Tools added: - Wikipedia: search, get article, get full article - News: Hacker News, Reddit, aggregated news search - Finance: stocks (yfinance), crypto (CoinGecko), exchange rates - Medical: PubMed, FDA, disease data, health topics - Weather: current, forecast, air quality (Open-Meteo) - Science: arXiv, Semantic Scholar, DOAJ - Web: DuckDuckGo search, instant answers, page content All tools use completely free APIs with no authentication required.
435 lines
13 KiB
Python
435 lines
13 KiB
Python
"""
|
|
News Tool - Fetch news from free sources
|
|
|
|
Free sources used:
|
|
- GNews API (free tier: 100 requests/day)
|
|
- Currents API (free tier: 200 requests/day)
|
|
- Hacker News (completely free)
|
|
- Reddit (free JSON feeds)
|
|
|
|
No API key required for Hacker News and Reddit.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from datetime import datetime, timedelta
|
|
from typing import Optional
|
|
|
|
import requests
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Free news APIs (no key required for some)
|
|
GNEWS_API = "https://gnews.io/api/v4"
|
|
CURRENTS_API = "https://api.currentsapi.services/v1"
|
|
HACKER_NEWS_API = "https://hacker-news.firebaseio.com/v0"
|
|
REDDIT_API = "https://www.reddit.com"
|
|
|
|
|
|
def news_search_hackernews(
|
|
query: str,
|
|
limit: int = 10,
|
|
) -> dict:
|
|
"""
|
|
Search Hacker News for stories.
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum number of results (default: 10)
|
|
|
|
Returns:
|
|
Dictionary with search results
|
|
"""
|
|
try:
|
|
# Use Hacker News Algolia API for search (free, no key)
|
|
search_url = "https://hn.algolia.com/api/v1/search"
|
|
params = {
|
|
"query": query,
|
|
"hitsPerPage": limit,
|
|
"tags": "story",
|
|
}
|
|
|
|
response = requests.get(search_url, params=params, timeout=10)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for hit in data.get("hits", []):
|
|
results.append({
|
|
"title": hit.get("title", ""),
|
|
"url": hit.get("url", ""),
|
|
"points": hit.get("points", 0),
|
|
"author": hit.get("author", ""),
|
|
"created_at": hit.get("created_at", ""),
|
|
"comments": hit.get("num_comments", 0),
|
|
"hn_link": f"https://news.ycombinator.com/item?id={hit.get('objectID', '')}",
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "hacker_news",
|
|
"query": query,
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Hacker News search failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "hacker_news",
|
|
}
|
|
|
|
|
|
def news_get_top_stories(
|
|
limit: int = 15,
|
|
) -> dict:
|
|
"""
|
|
Get top stories from Hacker News.
|
|
|
|
Args:
|
|
limit: Maximum number of stories (default: 15)
|
|
|
|
Returns:
|
|
Dictionary with top stories
|
|
"""
|
|
try:
|
|
# Get top story IDs
|
|
response = requests.get(f"{HACKER_NEWS_API}/topstories.json", timeout=10)
|
|
response.raise_for_status()
|
|
story_ids = response.json()[:limit]
|
|
|
|
results = []
|
|
for story_id in story_ids:
|
|
try:
|
|
story_response = requests.get(
|
|
f"{HACKER_NEWS_API}/item/{story_id}.json",
|
|
timeout=10
|
|
)
|
|
story = story_response.json()
|
|
|
|
if story:
|
|
results.append({
|
|
"title": story.get("title", ""),
|
|
"url": story.get("url", ""),
|
|
"points": story.get("score", 0),
|
|
"author": story.get("by", ""),
|
|
"time": datetime.fromtimestamp(story.get("time", 0)).isoformat(),
|
|
"comments": story.get("descendants", 0),
|
|
"hn_link": f"https://news.ycombinator.com/item?id={story_id}",
|
|
})
|
|
except Exception:
|
|
continue
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "hacker_news",
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Hacker News top stories failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "hacker_news",
|
|
}
|
|
|
|
|
|
def news_get_reddit(
|
|
subreddit: str = "worldnews",
|
|
limit: int = 15,
|
|
timeframe: str = "day",
|
|
) -> dict:
|
|
"""
|
|
Get top posts from a Reddit subreddit.
|
|
|
|
Args:
|
|
subreddit: Subreddit name (default: worldnews)
|
|
limit: Maximum number of posts (default: 15)
|
|
timeframe: Time period (hour, day, week, month, year, all)
|
|
|
|
Returns:
|
|
Dictionary with Reddit posts
|
|
"""
|
|
try:
|
|
# Reddit provides free JSON feeds
|
|
url = f"{REDDIT_API}/r/{subreddit}/top.json"
|
|
headers = {"User-Agent": "DocRAG/1.0"}
|
|
params = {
|
|
"limit": limit,
|
|
"t": timeframe,
|
|
}
|
|
|
|
response = requests.get(url, headers=headers, params=params, timeout=10)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for child in data.get("data", {}).get("children", []):
|
|
post = child.get("data", {})
|
|
results.append({
|
|
"title": post.get("title", ""),
|
|
"url": post.get("url", ""),
|
|
"author": post.get("author", ""),
|
|
"score": post.get("score", 0),
|
|
"comments": post.get("num_comments", 0),
|
|
"subreddit": post.get("subreddit", ""),
|
|
"created": datetime.fromtimestamp(post.get("created_utc", 0)).isoformat(),
|
|
"permalink": f"https://reddit.com{post.get('permalink', '')}",
|
|
"selftext": post.get("selftext", "")[:500] if post.get("selftext") else "",
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "reddit",
|
|
"subreddit": subreddit,
|
|
"timeframe": timeframe,
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Reddit fetch failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "reddit",
|
|
}
|
|
|
|
|
|
def news_search_reddit(
|
|
query: str,
|
|
subreddit: str = "all",
|
|
limit: int = 15,
|
|
) -> dict:
|
|
"""
|
|
Search Reddit for posts matching a query.
|
|
|
|
Args:
|
|
query: Search query
|
|
subreddit: Subreddit to search (default: all)
|
|
limit: Maximum number of results (default: 15)
|
|
|
|
Returns:
|
|
Dictionary with search results
|
|
"""
|
|
try:
|
|
url = f"{REDDIT_API}/r/{subreddit}/search.json"
|
|
headers = {"User-Agent": "DocRAG/1.0"}
|
|
params = {
|
|
"q": query,
|
|
"limit": limit,
|
|
"sort": "relevance",
|
|
"restrict_sr": "true" if subreddit != "all" else "false",
|
|
}
|
|
|
|
response = requests.get(url, headers=headers, params=params, timeout=10)
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
results = []
|
|
for child in data.get("data", {}).get("children", []):
|
|
post = child.get("data", {})
|
|
results.append({
|
|
"title": post.get("title", ""),
|
|
"url": post.get("url", ""),
|
|
"author": post.get("author", ""),
|
|
"score": post.get("score", 0),
|
|
"comments": post.get("num_comments", 0),
|
|
"subreddit": post.get("subreddit", ""),
|
|
"created": datetime.fromtimestamp(post.get("created_utc", 0)).isoformat(),
|
|
"permalink": f"https://reddit.com{post.get('permalink', '')}",
|
|
"selftext": post.get("selftext", "")[:500] if post.get("selftext") else "",
|
|
})
|
|
|
|
return {
|
|
"success": True,
|
|
"source": "reddit",
|
|
"query": query,
|
|
"subreddit": subreddit,
|
|
"results": results,
|
|
"count": len(results),
|
|
}
|
|
|
|
except Exception as e:
|
|
log.error(f"Reddit search failed: {e}")
|
|
return {
|
|
"success": False,
|
|
"error": str(e),
|
|
"source": "reddit",
|
|
}
|
|
|
|
|
|
def news_aggregate(
|
|
query: str,
|
|
limit: int = 10,
|
|
) -> dict:
|
|
"""
|
|
Aggregate news from multiple free sources.
|
|
|
|
Args:
|
|
query: Search query
|
|
limit: Maximum results per source (default: 10)
|
|
|
|
Returns:
|
|
Dictionary with aggregated news from multiple sources
|
|
"""
|
|
results = []
|
|
errors = []
|
|
|
|
# Search Hacker News
|
|
hn_result = news_search_hackernews(query, limit)
|
|
if hn_result.get("success"):
|
|
results.extend([
|
|
{**r, "source": "hacker_news"} for r in hn_result.get("results", [])
|
|
])
|
|
else:
|
|
errors.append(f"Hacker News: {hn_result.get('error')}")
|
|
|
|
# Search Reddit
|
|
reddit_result = news_search_reddit(query, "all", limit)
|
|
if reddit_result.get("success"):
|
|
results.extend([
|
|
{**r, "source": "reddit"} for r in reddit_result.get("results", [])
|
|
])
|
|
else:
|
|
errors.append(f"Reddit: {reddit_result.get('error')}")
|
|
|
|
return {
|
|
"success": True,
|
|
"query": query,
|
|
"results": results,
|
|
"count": len(results),
|
|
"sources_checked": ["hacker_news", "reddit"],
|
|
"errors": errors if errors else None,
|
|
}
|
|
|
|
|
|
# Tool schemas for OpenAI function calling
|
|
NEWS_SEARCH_HACKERNEWS_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "news_search_hackernews",
|
|
"description": "Search Hacker News for tech news and discussions. Best for technology, startups, programming topics.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results (default: 10)",
|
|
"default": 10,
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|
|
|
|
NEWS_GET_TOP_STORIES_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "news_get_top_stories",
|
|
"description": "Get current top stories from Hacker News. Use for general tech news browsing.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of stories (default: 15)",
|
|
"default": 15,
|
|
},
|
|
},
|
|
"required": [],
|
|
},
|
|
},
|
|
}
|
|
|
|
NEWS_GET_REDDIT_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "news_get_reddit",
|
|
"description": "Get top posts from a Reddit subreddit. Great for news, discussions, and community content.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"subreddit": {
|
|
"type": "string",
|
|
"description": "Subreddit name (e.g., worldnews, technology, science)",
|
|
"default": "worldnews",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of posts (default: 15)",
|
|
"default": 15,
|
|
},
|
|
"timeframe": {
|
|
"type": "string",
|
|
"description": "Time period (hour, day, week, month, year, all)",
|
|
"default": "day",
|
|
"enum": ["hour", "day", "week", "month", "year", "all"],
|
|
},
|
|
},
|
|
"required": [],
|
|
},
|
|
},
|
|
}
|
|
|
|
NEWS_SEARCH_REDDIT_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "news_search_reddit",
|
|
"description": "Search Reddit for posts matching a query across all subreddits.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"subreddit": {
|
|
"type": "string",
|
|
"description": "Subreddit to search (default: all)",
|
|
"default": "all",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results (default: 15)",
|
|
"default": 15,
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|
|
|
|
NEWS_AGGREGATE_SCHEMA = {
|
|
"type": "function",
|
|
"function": {
|
|
"name": "news_aggregate",
|
|
"description": "Search for news from multiple sources (Hacker News, Reddit) in one call. Best for comprehensive news coverage.",
|
|
"parameters": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"description": "Search query",
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum results per source (default: 10)",
|
|
"default": 10,
|
|
},
|
|
},
|
|
"required": ["query"],
|
|
},
|
|
},
|
|
}
|