""" News Tool - Fetch news from free sources Free sources used: - GNews API (free tier: 100 requests/day) - Currents API (free tier: 200 requests/day) - Hacker News (completely free) - Reddit (free JSON feeds) No API key required for Hacker News and Reddit. """ from __future__ import annotations import logging from datetime import datetime, timedelta from typing import Optional import requests log = logging.getLogger(__name__) # Free news APIs (no key required for some) GNEWS_API = "https://gnews.io/api/v4" CURRENTS_API = "https://api.currentsapi.services/v1" HACKER_NEWS_API = "https://hacker-news.firebaseio.com/v0" REDDIT_API = "https://www.reddit.com" def news_search_hackernews( query: str, limit: int = 10, ) -> dict: """ Search Hacker News for stories. Args: query: Search query limit: Maximum number of results (default: 10) Returns: Dictionary with search results """ try: # Use Hacker News Algolia API for search (free, no key) search_url = "https://hn.algolia.com/api/v1/search" params = { "query": query, "hitsPerPage": limit, "tags": "story", } response = requests.get(search_url, params=params, timeout=10) response.raise_for_status() data = response.json() results = [] for hit in data.get("hits", []): results.append({ "title": hit.get("title", ""), "url": hit.get("url", ""), "points": hit.get("points", 0), "author": hit.get("author", ""), "created_at": hit.get("created_at", ""), "comments": hit.get("num_comments", 0), "hn_link": f"https://news.ycombinator.com/item?id={hit.get('objectID', '')}", }) return { "success": True, "source": "hacker_news", "query": query, "results": results, "count": len(results), } except Exception as e: log.error(f"Hacker News search failed: {e}") return { "success": False, "error": str(e), "source": "hacker_news", } def news_get_top_stories( limit: int = 15, ) -> dict: """ Get top stories from Hacker News. Args: limit: Maximum number of stories (default: 15) Returns: Dictionary with top stories """ try: # Get top story IDs response = requests.get(f"{HACKER_NEWS_API}/topstories.json", timeout=10) response.raise_for_status() story_ids = response.json()[:limit] results = [] for story_id in story_ids: try: story_response = requests.get( f"{HACKER_NEWS_API}/item/{story_id}.json", timeout=10 ) story = story_response.json() if story: results.append({ "title": story.get("title", ""), "url": story.get("url", ""), "points": story.get("score", 0), "author": story.get("by", ""), "time": datetime.fromtimestamp(story.get("time", 0)).isoformat(), "comments": story.get("descendants", 0), "hn_link": f"https://news.ycombinator.com/item?id={story_id}", }) except Exception: continue return { "success": True, "source": "hacker_news", "results": results, "count": len(results), } except Exception as e: log.error(f"Hacker News top stories failed: {e}") return { "success": False, "error": str(e), "source": "hacker_news", } def news_get_reddit( subreddit: str = "worldnews", limit: int = 15, timeframe: str = "day", ) -> dict: """ Get top posts from a Reddit subreddit. Args: subreddit: Subreddit name (default: worldnews) limit: Maximum number of posts (default: 15) timeframe: Time period (hour, day, week, month, year, all) Returns: Dictionary with Reddit posts """ try: # Reddit provides free JSON feeds url = f"{REDDIT_API}/r/{subreddit}/top.json" headers = {"User-Agent": "DocRAG/1.0"} params = { "limit": limit, "t": timeframe, } response = requests.get(url, headers=headers, params=params, timeout=10) response.raise_for_status() data = response.json() results = [] for child in data.get("data", {}).get("children", []): post = child.get("data", {}) results.append({ "title": post.get("title", ""), "url": post.get("url", ""), "author": post.get("author", ""), "score": post.get("score", 0), "comments": post.get("num_comments", 0), "subreddit": post.get("subreddit", ""), "created": datetime.fromtimestamp(post.get("created_utc", 0)).isoformat(), "permalink": f"https://reddit.com{post.get('permalink', '')}", "selftext": post.get("selftext", "")[:500] if post.get("selftext") else "", }) return { "success": True, "source": "reddit", "subreddit": subreddit, "timeframe": timeframe, "results": results, "count": len(results), } except Exception as e: log.error(f"Reddit fetch failed: {e}") return { "success": False, "error": str(e), "source": "reddit", } def news_search_reddit( query: str, subreddit: str = "all", limit: int = 15, ) -> dict: """ Search Reddit for posts matching a query. Args: query: Search query subreddit: Subreddit to search (default: all) limit: Maximum number of results (default: 15) Returns: Dictionary with search results """ try: url = f"{REDDIT_API}/r/{subreddit}/search.json" headers = {"User-Agent": "DocRAG/1.0"} params = { "q": query, "limit": limit, "sort": "relevance", "restrict_sr": "true" if subreddit != "all" else "false", } response = requests.get(url, headers=headers, params=params, timeout=10) response.raise_for_status() data = response.json() results = [] for child in data.get("data", {}).get("children", []): post = child.get("data", {}) results.append({ "title": post.get("title", ""), "url": post.get("url", ""), "author": post.get("author", ""), "score": post.get("score", 0), "comments": post.get("num_comments", 0), "subreddit": post.get("subreddit", ""), "created": datetime.fromtimestamp(post.get("created_utc", 0)).isoformat(), "permalink": f"https://reddit.com{post.get('permalink', '')}", "selftext": post.get("selftext", "")[:500] if post.get("selftext") else "", }) return { "success": True, "source": "reddit", "query": query, "subreddit": subreddit, "results": results, "count": len(results), } except Exception as e: log.error(f"Reddit search failed: {e}") return { "success": False, "error": str(e), "source": "reddit", } def news_aggregate( query: str, limit: int = 10, ) -> dict: """ Aggregate news from multiple free sources. Args: query: Search query limit: Maximum results per source (default: 10) Returns: Dictionary with aggregated news from multiple sources """ results = [] errors = [] # Search Hacker News hn_result = news_search_hackernews(query, limit) if hn_result.get("success"): results.extend([ {**r, "source": "hacker_news"} for r in hn_result.get("results", []) ]) else: errors.append(f"Hacker News: {hn_result.get('error')}") # Search Reddit reddit_result = news_search_reddit(query, "all", limit) if reddit_result.get("success"): results.extend([ {**r, "source": "reddit"} for r in reddit_result.get("results", []) ]) else: errors.append(f"Reddit: {reddit_result.get('error')}") return { "success": True, "query": query, "results": results, "count": len(results), "sources_checked": ["hacker_news", "reddit"], "errors": errors if errors else None, } # Tool schemas for OpenAI function calling NEWS_SEARCH_HACKERNEWS_SCHEMA = { "type": "function", "function": { "name": "news_search_hackernews", "description": "Search Hacker News for tech news and discussions. Best for technology, startups, programming topics.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query", }, "limit": { "type": "integer", "description": "Maximum number of results (default: 10)", "default": 10, }, }, "required": ["query"], }, }, } NEWS_GET_TOP_STORIES_SCHEMA = { "type": "function", "function": { "name": "news_get_top_stories", "description": "Get current top stories from Hacker News. Use for general tech news browsing.", "parameters": { "type": "object", "properties": { "limit": { "type": "integer", "description": "Maximum number of stories (default: 15)", "default": 15, }, }, "required": [], }, }, } NEWS_GET_REDDIT_SCHEMA = { "type": "function", "function": { "name": "news_get_reddit", "description": "Get top posts from a Reddit subreddit. Great for news, discussions, and community content.", "parameters": { "type": "object", "properties": { "subreddit": { "type": "string", "description": "Subreddit name (e.g., worldnews, technology, science)", "default": "worldnews", }, "limit": { "type": "integer", "description": "Maximum number of posts (default: 15)", "default": 15, }, "timeframe": { "type": "string", "description": "Time period (hour, day, week, month, year, all)", "default": "day", "enum": ["hour", "day", "week", "month", "year", "all"], }, }, "required": [], }, }, } NEWS_SEARCH_REDDIT_SCHEMA = { "type": "function", "function": { "name": "news_search_reddit", "description": "Search Reddit for posts matching a query across all subreddits.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query", }, "subreddit": { "type": "string", "description": "Subreddit to search (default: all)", "default": "all", }, "limit": { "type": "integer", "description": "Maximum number of results (default: 15)", "default": 15, }, }, "required": ["query"], }, }, } NEWS_AGGREGATE_SCHEMA = { "type": "function", "function": { "name": "news_aggregate", "description": "Search for news from multiple sources (Hacker News, Reddit) in one call. Best for comprehensive news coverage.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query", }, "limit": { "type": "integer", "description": "Maximum results per source (default: 10)", "default": 10, }, }, "required": ["query"], }, }, }