docrag/tools/medical_tool.py
Z User 4394e7d6f9 Add comprehensive set of free data tools for RAG
Tools added:
- Wikipedia: search, get article, get full article
- News: Hacker News, Reddit, aggregated news search
- Finance: stocks (yfinance), crypto (CoinGecko), exchange rates
- Medical: PubMed, FDA, disease data, health topics
- Weather: current, forecast, air quality (Open-Meteo)
- Science: arXiv, Semantic Scholar, DOAJ
- Web: DuckDuckGo search, instant answers, page content

All tools use completely free APIs with no authentication required.
2026-03-29 06:27:32 +00:00

509 lines
15 KiB
Python

"""
Medical/Health Tool - Search medical literature and health data
Free sources used:
- PubMed/NCBI E-utilities API (completely free, no key required for basic use)
- Disease.sh API (completely free, open disease data)
- Health.gov API (free government health data)
- OpenFDA API (free FDA data)
All APIs are free and most don't require authentication.
"""
from __future__ import annotations
import logging
from datetime import datetime
from typing import Optional
import requests
log = logging.getLogger(__name__)
# Free medical API endpoints
PUBMED_EUTILS_API = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
DISEASE_API = "https://disease.sh/v3"
OPENFDA_API = "https://api.fda.gov"
HEALTH_GOV_API = "https://health.gov"
def medical_search_pubmed(
query: str,
max_results: int = 10,
) -> dict:
"""
Search PubMed for medical/health research articles.
Args:
query: Search query (medical terms, diseases, treatments, etc.)
max_results: Maximum number of results (default: 10)
Returns:
Dictionary with PubMed search results
"""
try:
# First, search for article IDs
search_url = f"{PUBMED_EUTILS_API}/esearch.fcgi"
search_params = {
"db": "pubmed",
"term": query,
"retmax": max_results,
"retmode": "json",
"sort": "relevance",
}
search_response = requests.get(search_url, params=search_params, timeout=15)
search_response.raise_for_status()
search_data = search_response.json()
id_list = search_data.get("esearchresult", {}).get("idlist", [])
if not id_list:
return {
"success": True,
"source": "pubmed",
"query": query,
"results": [],
"count": 0,
"message": "No articles found for this query",
}
# Fetch article summaries
fetch_url = f"{PUBMED_EUTILS_API}/esummary.fcgi"
fetch_params = {
"db": "pubmed",
"id": ",".join(id_list),
"retmode": "json",
}
fetch_response = requests.get(fetch_url, params=fetch_params, timeout=15)
fetch_response.raise_for_status()
fetch_data = fetch_response.json()
results = []
for article_id in id_list:
article = fetch_data.get("result", {}).get(article_id, {})
if article and "error" not in article:
results.append({
"pmid": article_id,
"title": article.get("title", ""),
"authors": [a.get("name", "") for a in article.get("authors", [])],
"journal": article.get("fulljournalname", article.get("source", "")),
"pub_date": article.get("pubdate", ""),
"doi": article.get("elocationid", ""),
"url": f"https://pubmed.ncbi.nlm.nih.gov/{article_id}/",
"abstract_available": "abstract" in article,
})
return {
"success": True,
"source": "pubmed",
"query": query,
"results": results,
"count": len(results),
"total_found": int(search_data.get("esearchresult", {}).get("count", 0)),
}
except Exception as e:
log.error(f"PubMed search failed: {e}")
return {
"success": False,
"error": str(e),
"source": "pubmed",
}
def medical_get_pubmed_abstract(
pmid: str,
) -> dict:
"""
Get the abstract of a PubMed article.
Args:
pmid: PubMed ID
Returns:
Dictionary with article abstract
"""
try:
fetch_url = f"{PUBMED_EUTILS_API}/efetch.fcgi"
params = {
"db": "pubmed",
"id": pmid,
"rettype": "abstract",
"retmode": "text",
}
response = requests.get(fetch_url, params=params, timeout=15)
response.raise_for_status()
abstract_text = response.text.strip()
return {
"success": True,
"source": "pubmed",
"pmid": pmid,
"abstract": abstract_text,
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
}
except Exception as e:
log.error(f"PubMed abstract fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "pubmed",
"pmid": pmid,
}
def medical_get_disease_data(
disease: str = "covid",
) -> dict:
"""
Get current disease statistics from Disease.sh API.
Args:
disease: Disease type (covid, influenza, or all)
Returns:
Dictionary with disease statistics
"""
try:
disease = disease.lower()
if disease in ["covid", "covid-19", "coronavirus"]:
url = f"{DISEASE_API}/covid-19/all"
elif disease in ["influenza", "flu"]:
url = f"{DISEASE_API}/influenza/cdc"
elif disease == "all":
url = f"{DISEASE_API}/all"
else:
# Try COVID-19 countries data
url = f"{DISEASE_API}/covid-19/countries/{disease}"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
return {
"success": True,
"source": "disease.sh",
"disease": disease,
"data": data,
"timestamp": datetime.now().isoformat(),
}
except Exception as e:
log.error(f"Disease data fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "disease.sh",
}
def medical_get_covid_country(
country: str = "usa",
) -> dict:
"""
Get COVID-19 statistics for a specific country.
Args:
country: Country name or ISO code (e.g., usa, uk, germany, china)
Returns:
Dictionary with country COVID-19 data
"""
try:
url = f"{DISEASE_API}/covid-19/countries/{country}"
response = requests.get(url, timeout=10)
response.raise_for_status()
data = response.json()
return {
"success": True,
"source": "disease.sh",
"country": data.get("country", country),
"cases": data.get("cases"),
"today_cases": data.get("todayCases"),
"deaths": data.get("deaths"),
"today_deaths": data.get("todayDeaths"),
"recovered": data.get("recovered"),
"active": data.get("active"),
"critical": data.get("critical"),
"cases_per_million": data.get("casesPerOneMillion"),
"deaths_per_million": data.get("deathsPerOneMillion"),
"tests": data.get("tests"),
"tests_per_million": data.get("testsPerOneMillion"),
"population": data.get("population"),
"continent": data.get("continent"),
"updated": datetime.fromtimestamp(data.get("updated", 0) / 1000).isoformat() if data.get("updated") else None,
}
except Exception as e:
log.error(f"COVID country data fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "disease.sh",
}
def medical_search_fda(
query: str,
database: str = "drug",
limit: int = 10,
) -> dict:
"""
Search FDA drug, device, or food databases.
Args:
query: Search query
database: Database to search (drug, device, food, other)
limit: Maximum results (default: 10)
Returns:
Dictionary with FDA search results
"""
try:
# Map database names to FDA endpoints
db_map = {
"drug": "drug/label",
"device": "device/510k",
"food": "food/enforcement",
"other": "other/substance",
}
endpoint = db_map.get(database.lower(), "drug/label")
url = f"{OPENFDA_API}/{endpoint}.json"
params = {
"search": query,
"limit": limit,
}
response = requests.get(url, params=params, timeout=15)
response.raise_for_status()
data = response.json()
results = []
for item in data.get("results", []):
if database.lower() == "drug":
results.append({
"brand_name": item.get("openfda", {}).get("brand_name", [""])[0] if item.get("openfda") else "",
"generic_name": item.get("openfda", {}).get("generic_name", [""])[0] if item.get("openfda") else "",
"manufacturer": item.get("openfda", {}).get("manufacturer_name", [""])[0] if item.get("openfda") else "",
"purpose": item.get("purpose", [""])[0] if item.get("purpose") else "",
"indications": item.get("indications_and_usage", [""])[0][:500] if item.get("indications_and_usage") else "",
"warnings": item.get("warnings", [""])[0][:500] if item.get("warnings") else "",
})
else:
results.append(item)
return {
"success": True,
"source": "openfda",
"database": database,
"query": query,
"results": results,
"count": len(results),
}
except Exception as e:
log.error(f"FDA search failed: {e}")
return {
"success": False,
"error": str(e),
"source": "openfda",
}
def medical_get_health_topics(
topic: Optional[str] = None,
limit: int = 10,
) -> dict:
"""
Get health topics from Health.gov.
Args:
topic: Health topic to search (optional)
limit: Maximum results (default: 10)
Returns:
Dictionary with health topics
"""
try:
url = f"{HEALTH_GOV_API}/myhealthfinder/api/v3/topicsearch.json"
params = {"lang": "en"}
if topic:
params["topic"] = topic
else:
params["pageSize"] = limit
response = requests.get(url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
topics = data.get("Result", {}).get("Resources", {}).get("Resource", [])
results = []
for item in topics[:limit]:
results.append({
"title": item.get("Title", ""),
"url": item.get("AccessibleVersion", item.get("MyHealthfinder", "")),
"image_url": item.get("ImageAltUrl", ""),
"image_alt": item.get("ImageAltText", ""),
"categories": item.get("Categories", ""),
"content": item.get("Sections", {}).get("section", [{}])[0].get("Content", "")[:500] if item.get("Sections") else "",
})
return {
"success": True,
"source": "health.gov",
"topic": topic,
"results": results,
"count": len(results),
}
except Exception as e:
log.error(f"Health topics fetch failed: {e}")
return {
"success": False,
"error": str(e),
"source": "health.gov",
}
# Tool schemas for OpenAI function calling
MEDICAL_SEARCH_PUBMED_SCHEMA = {
"type": "function",
"function": {
"name": "medical_search_pubmed",
"description": "Search PubMed for medical and health research articles. Use for scientific medical literature.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Medical search query (disease, treatment, drug, symptom)",
},
"max_results": {
"type": "integer",
"description": "Maximum number of results (default: 10)",
"default": 10,
},
},
"required": ["query"],
},
},
}
MEDICAL_GET_PUBMED_ABSTRACT_SCHEMA = {
"type": "function",
"function": {
"name": "medical_get_pubmed_abstract",
"description": "Get the full abstract of a PubMed article. Use after medical_search_pubmed to get detailed content.",
"parameters": {
"type": "object",
"properties": {
"pmid": {
"type": "string",
"description": "PubMed ID from search results",
},
},
"required": ["pmid"],
},
},
}
MEDICAL_GET_DISEASE_DATA_SCHEMA = {
"type": "function",
"function": {
"name": "medical_get_disease_data",
"description": "Get current disease statistics (COVID-19, influenza). Use for outbreak data and statistics.",
"parameters": {
"type": "object",
"properties": {
"disease": {
"type": "string",
"description": "Disease type (covid, influenza, all)",
"default": "covid",
},
},
"required": [],
},
},
}
MEDICAL_GET_COVID_COUNTRY_SCHEMA = {
"type": "function",
"function": {
"name": "medical_get_covid_country",
"description": "Get COVID-19 statistics for a specific country. Use for country-specific pandemic data.",
"parameters": {
"type": "object",
"properties": {
"country": {
"type": "string",
"description": "Country name or ISO code (e.g., usa, uk, germany)",
},
},
"required": [],
},
},
}
MEDICAL_SEARCH_FDA_SCHEMA = {
"type": "function",
"function": {
"name": "medical_search_fda",
"description": "Search FDA databases for drug information, device approvals, and food safety. Use for medication info.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query (drug name, ingredient, etc.)",
},
"database": {
"type": "string",
"description": "Database to search (drug, device, food)",
"default": "drug",
"enum": ["drug", "device", "food"],
},
"limit": {
"type": "integer",
"description": "Maximum results (default: 10)",
"default": 10,
},
},
"required": ["query"],
},
},
}
MEDICAL_GET_HEALTH_TOPICS_SCHEMA = {
"type": "function",
"function": {
"name": "medical_get_health_topics",
"description": "Get health information and topics from Health.gov. Use for general health advice and wellness topics.",
"parameters": {
"type": "object",
"properties": {
"topic": {
"type": "string",
"description": "Health topic to search (optional)",
},
"limit": {
"type": "integer",
"description": "Maximum results (default: 10)",
"default": 10,
},
},
"required": [],
},
},
}