""" Web Search Tool - General web search capabilities Free sources used: - DuckDuckGo Instant Answer API (completely free) - DuckDuckGo HTML search (free, no API key) - Wikipedia API (as fallback) All completely free, no API keys required. """ from __future__ import annotations import logging from datetime import datetime from typing import Optional from urllib.parse import quote_plus, unquote_plus import requests log = logging.getLogger(__name__) # Free search endpoints DUCKDUCKGO_API = "https://api.duckduckgo.com" DUCKDUCKGO_HTML = "https://html.duckduckgo.com/html" def web_search( query: str, max_results: int = 10, ) -> dict: """ Search the web using DuckDuckGo. Args: query: Search query max_results: Maximum number of results (default: 10) Returns: Dictionary with search results """ try: # Use DuckDuckGo HTML search (free, no API key) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } params = {"q": query} response = requests.get( DUCKDUCKGO_HTML, params=params, headers=headers, timeout=15 ) response.raise_for_status() # Parse HTML results results = _parse_ddg_html(response.text, max_results) return { "success": True, "source": "duckduckgo", "query": query, "results": results, "count": len(results), } except Exception as e: log.error(f"Web search failed: {e}") return { "success": False, "error": str(e), "source": "duckduckgo", } def _parse_ddg_html(html: str, max_results: int) -> list: """Parse DuckDuckGo HTML results.""" from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") results = [] # Find result links for result in soup.select(".result")[:max_results]: try: link_elem = result.select_one(".result__a") snippet_elem = result.select_one(".result__snippet") if link_elem: url = link_elem.get("href", "") # Extract actual URL from redirect if "uddg=" in url: url = url.split("uddg=")[-1].split("&")[0] url = unquote_plus(url) results.append({ "title": link_elem.get_text(strip=True), "url": url, "snippet": snippet_elem.get_text(strip=True) if snippet_elem else "", }) except Exception: continue return results def web_instant_answer( query: str, ) -> dict: """ Get instant answer from DuckDuckGo. Args: query: Query for instant answer Returns: Dictionary with instant answer """ try: params = { "q": query, "format": "json", "no_html": 1, "skip_disambig": 0, } response = requests.get(DUCKDUCKGO_API, params=params, timeout=10) response.raise_for_status() data = response.json() result = { "success": True, "source": "duckduckgo", "query": query, } # Abstract (main answer) if data.get("Abstract"): result["abstract"] = data.get("Abstract") result["abstract_source"] = data.get("AbstractSource") result["abstract_url"] = data.get("AbstractURL") result["image"] = data.get("Image") # Definition if data.get("Definition"): result["definition"] = data.get("Definition") result["definition_source"] = data.get("DefinitionSource") # Answer if data.get("Answer"): result["answer"] = data.get("Answer") # Related topics related = [] for topic in data.get("RelatedTopics", [])[:5]: if isinstance(topic, dict) and topic.get("Text"): related.append({ "text": topic.get("Text"), "url": topic.get("FirstURL"), }) if related: result["related_topics"] = related # Infobox if data.get("Infobox"): result["infobox"] = data.get("Infobox") return result except Exception as e: log.error(f"Instant answer failed: {e}") return { "success": False, "error": str(e), "source": "duckduckgo", } def web_get_page_content( url: str, max_length: int = 5000, ) -> dict: """ Fetch and extract text content from a web page. Args: url: URL to fetch max_length: Maximum content length (default: 5000 chars) Returns: Dictionary with page content """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } response = requests.get(url, headers=headers, timeout=15) response.raise_for_status() # Parse and extract text from bs4 import BeautifulSoup soup = BeautifulSoup(response.text, "html.parser") # Remove script and style elements for element in soup(["script", "style", "nav", "header", "footer"]): element.decompose() # Get title title = "" if soup.title: title = soup.title.get_text(strip=True) # Get main content text = soup.get_text(separator="\n", strip=True) # Clean up whitespace lines = [line.strip() for line in text.splitlines() if line.strip()] text = "\n".join(lines) # Truncate if needed if len(text) > max_length: text = text[:max_length] + "..." return { "success": True, "source": "web", "url": url, "title": title, "content": text, "content_length": len(text), } except Exception as e: log.error(f"Page content fetch failed: {e}") return { "success": False, "error": str(e), "source": "web", "url": url, } def web_search_and_fetch( query: str, max_results: int = 3, max_content_length: int = 3000, ) -> dict: """ Search web and fetch content from top results. Args: query: Search query max_results: Number of results to fetch (default: 3) max_content_length: Max content per page (default: 3000) Returns: Dictionary with search results and fetched content """ try: # First, search search_result = web_search(query, max_results) if not search_result.get("success"): return search_result results = search_result.get("results", []) # Fetch content from each result enriched_results = [] for result in results: if result.get("url"): content = web_get_page_content(result["url"], max_content_length) result["fetched_content"] = content.get("content", "") if content.get("success") else "" enriched_results.append(result) return { "success": True, "source": "duckduckgo", "query": query, "results": enriched_results, "count": len(enriched_results), } except Exception as e: log.error(f"Search and fetch failed: {e}") return { "success": False, "error": str(e), "source": "duckduckgo", } def web_get_headers( url: str, ) -> dict: """ Get HTTP headers for a URL. Args: url: URL to check Returns: Dictionary with HTTP headers """ try: headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" } response = requests.head(url, headers=headers, timeout=10, allow_redirects=True) return { "success": True, "source": "web", "url": url, "status_code": response.status_code, "headers": dict(response.headers), "final_url": response.url, } except Exception as e: log.error(f"Header fetch failed: {e}") return { "success": False, "error": str(e), "source": "web", "url": url, } # Tool schemas for OpenAI function calling WEB_SEARCH_SCHEMA = { "type": "function", "function": { "name": "web_search", "description": "Search the web using DuckDuckGo. Returns search results with titles, URLs, and snippets. Free, no API key required.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query", }, "max_results": { "type": "integer", "description": "Maximum number of results (default: 10)", "default": 10, }, }, "required": ["query"], }, }, } WEB_INSTANT_ANSWER_SCHEMA = { "type": "function", "function": { "name": "web_instant_answer", "description": "Get instant answer from DuckDuckGo for facts, definitions, and summaries. Good for quick facts.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Query for instant answer", }, }, "required": ["query"], }, }, } WEB_GET_PAGE_CONTENT_SCHEMA = { "type": "function", "function": { "name": "web_get_page_content", "description": "Fetch and extract text content from a web page URL. Use after web_search to get full content.", "parameters": { "type": "object", "properties": { "url": { "type": "string", "description": "URL to fetch", }, "max_length": { "type": "integer", "description": "Maximum content length in characters (default: 5000)", "default": 5000, }, }, "required": ["url"], }, }, } WEB_SEARCH_AND_FETCH_SCHEMA = { "type": "function", "function": { "name": "web_search_and_fetch", "description": "Search web and automatically fetch content from top results. Best for comprehensive research.", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "Search query", }, "max_results": { "type": "integer", "description": "Number of results to fetch (default: 3)", "default": 3, }, "max_content_length": { "type": "integer", "description": "Max content per page (default: 3000)", "default": 3000, }, }, "required": ["query"], }, }, }