- Pass all registered tools to LLM during chat completion - Handle tool_calls from LLM response - Execute tools and feed results back to LLM - Loop until LLM returns final response - Updated system prompt to encourage tool use - Updated streaming to handle tool calls - Increased MAX_TOOL_ITERATIONS to 5
465 lines
14 KiB
Python
Executable File
465 lines
14 KiB
Python
Executable File
"""
|
|
Scientific/Academic Tool - Search scientific papers and research
|
|
|
|
Free sources used:
|
|
- arXiv API (completely free, no key required)
|
|
- Semantic Scholar API (free tier)
|
|
- DOAJ (Directory of Open Access Journals - free)
|
|
- CORE API (free access to research papers)
|
|
|
|
All APIs are free for basic use.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
import xml.etree.ElementTree as ET
|
|
|
|
import requests
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Free academic APIs
|
|
ARXIV_API = "http://export.arxiv.org/api/query"
|
|
SEMANTIC_SCHOLAR_API = "https://api.semanticscholar.org/graph/v1"
|
|
DOAJ_API = "https://api.doaj.org"
|
|
|
|
|
|
def science_search_arxiv(
|
|
query: str,
|
|
max_results: int = 10,
|
|
category: Optional[str] = None,
|
|
) -> dict:
|
|
"""
|
|
Search arXiv for scientific preprints.
|
|
|
|
Args:
|
|
query: Search query
|
|
max_results: Maximum number of results (default: 10)
|
|
category: arXiv category filter (e.g., cs.AI, physics, math.CO)
|
|
|
|
Returns:
|
|
Dictionary with arXiv search results
|
|
"""
|
|
try:
|
|
# Build search query
|
|
search_query = query
|
|
if category:
|
|
search_query = f"cat:{category} AND {query}"
|
|
|
|
params = {
|
|
"search_query": search_query,
|
|
"start": 0,
|
|
"max_results": max_results,
|
|
"sortBy": "relevance",
|
|
"sortOrder": "descending",
|
|
}
|
|
|
|
response = requests.get(ARXIV_API, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
# Parse XML response
|
|
root = ET.fromstring(response.content)
|
|
|
|
# Define namespace
|
|
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
|
|
|
results = []
|
|
for entry in root.findall("atom:entry", ns):
|
|
title = entry.find("atom:title", ns)
|
|
summary = entry.find("atom:summary", ns)
|
|
published = entry.find("atom:published", ns)
|
|
updated = entry.find("atom:updated", ns)
|
|
link = entry.find("atom:id", ns)
|
|
|
|
authors = []
|
|
for author in entry.findall("atom:author", ns):
|
|
name = author.find("atom:name", ns)
|
|
if name is not None:
|
|
authors.append(name.text)
|
|
|
|
# Get categories
|
|
categories = []
|
|
for cat in entry.findall("atom:category", ns):
|
|
term = cat.get("term")
|
|
if term:
|
|
categories.append(term)
|
|
|
|
results.append({
|
|
"title": title.text.strip() if title is not None else "",
|
|
"abstract": summary.text.strip()[:1000] if summary is not None else "",
|
|
"authors": authors,
|
|
"published": published.text if published is not None else "",
|
|
"updated": updated.text if updated is not None else "",
|
|
"link": link.text if link is not None else "",
|
|
"pdf_link": link.text.replace("/abs/", "/pdf/") if link is not None else "",
|
|
"categories": categories,
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "arxiv",
|
|
"query": query,
|
|
"category": category,
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"arXiv search failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "arxiv",
|
|
}
|
|
|
|
|
|
def science_search_semantic_scholar(
|
|
query: str,
|
|
limit: int = 10,
|
|
year: Optional[str] = None,
|
|
) -> dict:
|
|
"""
|
|
Search Semantic Scholar for academic papers.
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum number of results (default: 10)
|
|
year: Year filter (e.g., "2020-", "2018-2022")
|
|
|
|
Returns:
|
|
Dictionary with Semantic Scholar results
|
|
"""
|
|
try:
|
|
url = f"{SEMANTIC_SCHOLAR_API}/paper/search"
|
|
params = {
|
|
"query": query,
|
|
"limit": limit,
|
|
"fields": "title,abstract,authors,year,venue,citationCount,openAccessPdf,url",
|
|
}
|
|
|
|
if year:
|
|
params["year"] = year
|
|
|
|
response = requests.get(url, params=params, timeout=15)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for paper in data.get("data", []):
|
|
authors = [a.get("name", "") for a in paper.get("authors", [])]
|
|
|
|
pdf_url = None
|
|
if paper.get("openAccessPdf"):
|
|
pdf_url = paper["openAccessPdf"].get("url")
|
|
|
|
results.append({
|
|
"paper_id": paper.get("paperId"),
|
|
"title": paper.get("title", ""),
|
|
"abstract": paper.get("abstract", "")[:1000] if paper.get("abstract") else "",
|
|
"authors": authors,
|
|
"year": paper.get("year"),
|
|
"venue": paper.get("venue", ""),
|
|
"citations": paper.get("citationCount", 0),
|
|
"url": paper.get("url"),
|
|
"pdf_url": pdf_url,
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "semantic_scholar",
|
|
"query": query,
|
|
"year_filter": year,
|
|
"results": results,
|
|
"count": len(results),
|
|
"total": data.get("total", len(results)),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Semantic Scholar search failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "semantic_scholar",
|
|
}
|
|
|
|
|
|
def science_get_paper_details(
|
|
paper_id: str,
|
|
) -> dict:
|
|
"""
|
|
Get detailed information about a paper from Semantic Scholar.
|
|
|
|
Args:
|
|
paper_id: Semantic Scholar paper ID or DOI
|
|
|
|
Returns:
|
|
Dictionary with paper details
|
|
"""
|
|
try:
|
|
url = f"{SEMANTIC_SCHOLAR_API}/paper/{paper_id}"
|
|
params = {
|
|
"fields": "title,abstract,authors,year,venue,citationCount,referenceCount,openAccessPdf,url,journal,publicationVenue,tldr",
|
|
}
|
|
|
|
response = requests.get(url, params=params, timeout=15)
|
|
response.raise_for_status()
|
|
paper = response.json()
|
|
|
|
authors = [a.get("name", "") for a in paper.get("authors", [])]
|
|
|
|
pdf_url = None
|
|
if paper.get("openAccessPdf"):
|
|
pdf_url = paper["openAccessPdf"].get("url")
|
|
|
|
tldr = None
|
|
if paper.get("tldr"):
|
|
tldr = paper["tldr"].get("text")
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "semantic_scholar",
|
|
"paper_id": paper.get("paperId"),
|
|
"title": paper.get("title", ""),
|
|
"abstract": paper.get("abstract", ""),
|
|
"authors": authors,
|
|
"year": paper.get("year"),
|
|
"venue": paper.get("venue", ""),
|
|
"journal": paper.get("journal", {}).get("name") if paper.get("journal") else None,
|
|
"citations": paper.get("citationCount", 0),
|
|
"references": paper.get("referenceCount", 0),
|
|
"url": paper.get("url"),
|
|
"pdf_url": pdf_url,
|
|
"tldr": tldr,
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Paper details fetch failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "semantic_scholar",
|
|
}
|
|
|
|
|
|
def science_search_doaj(
|
|
query: str,
|
|
limit: int = 10,
|
|
) -> dict:
|
|
"""
|
|
Search DOAJ (Directory of Open Access Journals).
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum number of results (default: 10)
|
|
|
|
Returns:
|
|
Dictionary with DOAJ results
|
|
"""
|
|
try:
|
|
url = f"{DOAJ_API}/search/articles/{query}"
|
|
params = {
|
|
"pageSize": limit,
|
|
"page": 1,
|
|
}
|
|
|
|
headers = {"Accept": "application/json"}
|
|
|
|
response = requests.get(url, params=params, headers=headers, timeout=15)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for article in data.get("results", []):
|
|
bibjson = article.get("bibjson", {})
|
|
results.append({
|
|
"title": bibjson.get("title", ""),
|
|
"abstract": bibjson.get("abstract", "")[:1000] if bibjson.get("abstract") else "",
|
|
"authors": [a.get("name", "") for a in bibjson.get("author", [])],
|
|
"year": bibjson.get("year"),
|
|
"journal": bibjson.get("journal", {}).get("title", ""),
|
|
"doi": bibjson.get("identifier", [{}])[0].get("id") if bibjson.get("identifier") else None,
|
|
"link": bibjson.get("link", [{}])[0].get("url") if bibjson.get("link") else None,
|
|
"keywords": bibjson.get("keywords", []),
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "doaj",
|
|
"query": query,
|
|
"results": results,
|
|
"count": len(results),
|
|
"total": data.get("total", len(results)),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"DOAJ search failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "doaj",
|
|
}
|
|
|
|
|
|
def science_aggregate_search(
|
|
query: str,
|
|
limit: int = 5,
|
|
) -> dict:
|
|
"""
|
|
Search multiple academic sources at once.
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum results per source (default: 5)
|
|
|
|
Returns:
|
|
Dictionary with aggregated results from multiple sources
|
|
"""
|
|
results = []
|
|
errors = []
|
|
|
|
# Search arXiv
|
|
arxiv_result = science_search_arxiv(query, limit)
|
|
if arxiv_result.get("success"):
|
|
results.extend([{**r, "source": "arxiv"} for r in arxiv_result.get("results", [])])
|
|
else:
|
|
errors.append(f"arXiv: {arxiv_result.get('error')}")
|
|
|
|
# Search Semantic Scholar
|
|
ss_result = science_search_semantic_scholar(query, limit)
|
|
if ss_result.get("success"):
|
|
results.extend([{**r, "source": "semantic_scholar"} for r in ss_result.get("results", [])])
|
|
else:
|
|
errors.append(f"Semantic Scholar: {ss_result.get('error')}")
|
|
|
|
return {
|
|
"success": True,
|
|
"query": query,
|
|
"results": results,
|
|
"count": len(results),
|
|
"sources_checked": ["arxiv", "semantic_scholar"],
|
|
"errors": errors if errors else None,
|
|
}
|
|
|
|
|
|
# Tool schemas for OpenAI function calling
|
|
SCIENCE_SEARCH_ARXIV_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "science_search_arxiv",
|
|
"description": "Search arXiv for scientific preprints. Best for physics, math, computer science, and AI research.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"max_results": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results (default: 10)",
|
|
"default": 10,
|
|
},
|
|
"category": {
|
|
"type": "string",
|
|
"description": "arXiv category filter (e.g., cs.AI, cs.LG, physics, math.CO)",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|
|
|
|
SCIENCE_SEARCH_SEMANTIC_SCHOLAR_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "science_search_semantic_scholar",
|
|
"description": "Search Semantic Scholar for academic papers across all fields. Includes citation counts and open access PDFs.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results (default: 10)",
|
|
"default": 10,
|
|
},
|
|
"year": {
|
|
"type": "string",
|
|
"description": "Year filter (e.g., '2020-', '2018-2022')",
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|
|
|
|
SCIENCE_GET_PAPER_DETAILS_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "science_get_paper_details",
|
|
"description": "Get detailed information about a specific paper including TLDR summary. Use paper ID from search results.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"paper_id": {
|
|
"type": "string",
|
|
"description": "Semantic Scholar paper ID or DOI",
|
|
},
|
|
},
|
|
"required": ["paper_id"],
|
|
},
|
|
},
|
|
}
|
|
|
|
SCIENCE_SEARCH_DOAJ_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "science_search_doaj",
|
|
"description": "Search DOAJ for open access journal articles. Best for peer-reviewed open access research.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results (default: 10)",
|
|
"default": 10,
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|
|
|
|
SCIENCE_AGGREGATE_SEARCH_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "science_aggregate_search",
|
|
"description": "Search multiple academic sources (arXiv, Semantic Scholar) at once for comprehensive coverage.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum results per source (default: 5)",
|
|
"default": 5,
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|