docrag/tools/wikipedia_tool.py
Z User b811162f78 Implement tool calling loop for LLM
- Pass all registered tools to LLM during chat completion
- Handle tool_calls from LLM response
- Execute tools and feed results back to LLM
- Loop until LLM returns final response
- Updated system prompt to encourage tool use
- Updated streaming to handle tool calls
- Increased MAX_TOOL_ITERATIONS to 5
2026-03-29 16:07:56 +00:00

260 lines
7.1 KiB
Python
Executable File

"""
Wikipedia Tool - Search and retrieve Wikipedia articles
Free API with no authentication required.
Rate limit: Be respectful, no strict limits.
"""
from __future__ import annotations
import logging
from typing import Optional
import requests
log = logging.getLogger(__name__)
WIKIPEDIA_API = "https://en.wikipedia.org/w/api.php"
def wikipedia_search(
query: str,
limit: int = 5,
) -> dict:
"""
Search Wikipedia for articles matching the query.
Args:
query: Search query
limit: Maximum number of results (default: 5)
Returns:
Dictionary with search results
"""
try:
params = {
"action": "query",
"list": "search",
"srsearch": query,
"srlimit": limit,
"format": "json",
"utf8": 1,
}
response = requests.get(WIKIPEDIA_API, params=params, timeout=10)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("query", {}).get("search", []):
results.append({
"title": item.get("title", ""),
"pageid": item.get("pageid", 0),
"snippet": item.get("snippet", "").replace("<span class=\"searchmatch\">", "").replace("</span>", ""),
"wordcount": item.get("wordcount", 0),
"url": f"https://en.wikipedia.org/?curid={item.get('pageid', 0)}",
})
return {
"success": True,
"source": "wikipedia",
"query": query,
"results": results,
"count": len(results),
}
except Exception as e:
log.error(f"Wikipedia search failed: {e}")
return {
"success": False,
"error": str(e),
"source": "wikipedia",
}
def wikipedia_get_article(
title: str,
sentences: int = 10,
) -> dict:
"""
Get the content of a Wikipedia article.
Args:
title: Article title (exact match or pageid)
sentences: Number of sentences to return (default: 10, max: 50)
Returns:
Dictionary with article content
"""
try:
params = {
"action": "query",
"prop": "extracts",
"exsentences": min(sentences, 50),
"exintro": True,
"explaintext": True,
"titles": title,
"format": "json",
"utf8": 1,
"redirects": 1,
}
response = requests.get(WIKIPEDIA_API, params=params, timeout=10)
response.raise_for_status()
data = response.json()
pages = data.get("query", {}).get("pages", {})
articles = []
for page_id, page_data in pages.items():
if page_id != "-1": # -1 means page not found
articles.append({
"title": page_data.get("title", ""),
"pageid": page_id,
"extract": page_data.get("extract", ""),
"url": f"https://en.wikipedia.org/?curid={page_id}",
})
if not articles:
return {
"success": False,
"error": f"Article not found: {title}",
"source": "wikipedia",
}
return {
"success": True,
"source": "wikipedia",
"articles": articles,
"count": len(articles),
}
except Exception as e:
log.error(f"Wikipedia article fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "wikipedia",
}
def wikipedia_get_full_article(
title: str,
) -> dict:
"""
Get the full content of a Wikipedia article.
Args:
title: Article title
Returns:
Dictionary with full article content
"""
try:
params = {
"action": "query",
"prop": "extracts",
"explaintext": True,
"titles": title,
"format": "json",
"utf8": 1,
"redirects": 1,
}
response = requests.get(WIKIPEDIA_API, params=params, timeout=15)
response.raise_for_status()
data = response.json()
pages = data.get("query", {}).get("pages", {})
for page_id, page_data in pages.items():
if page_id != "-1":
return {
"success": True,
"source": "wikipedia",
"title": page_data.get("title", ""),
"pageid": page_id,
"content": page_data.get("extract", ""),
"url": f"https://en.wikipedia.org/?curid={page_id}",
}
return {
"success": False,
"error": f"Article not found: {title}",
"source": "wikipedia",
}
except Exception as e:
log.error(f"Wikipedia full article fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "wikipedia",
}
# Tool schemas for OpenAI function calling
WIKIPEDIA_SEARCH_SCHEMA = {
"type": "function",
"function": {
"name": "wikipedia_search",
"description": "Search Wikipedia for articles matching a query. Returns a list of article titles and snippets.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query",
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return (default: 5)",
"default": 5,
},
},
"required": ["query"],
},
},
}
WIKIPEDIA_GET_ARTICLE_SCHEMA = {
"type": "function",
"function": {
"name": "wikipedia_get_article",
"description": "Get the introduction/summary of a Wikipedia article. Use this after wikipedia_search to get more details.",
"parameters": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "The exact article title from search results",
},
"sentences": {
"type": "integer",
"description": "Number of sentences to return (default: 10)",
"default": 10,
},
},
"required": ["title"],
},
},
}
WIKIPEDIA_GET_FULL_ARTICLE_SCHEMA = {
"type": "function",
"function": {
"name": "wikipedia_get_full_article",
"description": "Get the full content of a Wikipedia article. Use for comprehensive research when the summary is not enough.",
"parameters": {
"type": "object",
"properties": {
"title": {
"type": "string",
"description": "The exact article title",
},
},
"required": ["title"],
},
},
}