Switch to context-based tool calling (no API tool limit)
Instead of passing tools to the OpenRouter API (limited to 10 tools):
- Tool descriptions are now embedded in the system prompt
- LLM outputs tool calls as JSON: {"tool_call": {"name": "...", "arguments": {...}}}
- We parse the response, execute tools, and feed results back
- Supports all 33 tools without hitting the API limit
Changes:
- Added _build_tool_descriptions() for tool docs in prompt
- Added _parse_tool_call() to extract tool requests from LLM output
- Added _clean_tool_syntax() to remove tool JSON from responses
- Rewrote generate_response() for context-based approach
- Updated system prompt with tool usage instructions
This commit is contained in:
parent
ac0eff1cdd
commit
6eb18ce7f3
245
main.py
245
main.py
@ -661,37 +661,55 @@ def build_enhanced_messages(
|
|||||||
context: str,
|
context: str,
|
||||||
sources: list[str],
|
sources: list[str],
|
||||||
download_info: dict = None,
|
download_info: dict = None,
|
||||||
|
tool_results: list[dict] = None,
|
||||||
) -> list[ChatMessage]:
|
) -> list[ChatMessage]:
|
||||||
"""Build enhanced messages with RAG context."""
|
"""Build enhanced messages with RAG context."""
|
||||||
enhanced = []
|
enhanced = []
|
||||||
|
|
||||||
|
# Build tool descriptions for context
|
||||||
|
tool_descriptions = _build_tool_descriptions()
|
||||||
|
|
||||||
# Add system message with RAG context and tool instructions
|
# Add system message with RAG context and tool instructions
|
||||||
system_content = (
|
system_content = f"""You are a helpful AI assistant with access to real-time data through various tools.
|
||||||
"You are a helpful AI assistant with access to real-time data through various tools. "
|
|
||||||
"You MUST use these tools to get current information when the user asks about:\n"
|
## AVAILABLE TOOLS
|
||||||
"- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n"
|
{tool_descriptions}
|
||||||
"- Weather → use weather_get_current, weather_get_forecast\n"
|
|
||||||
"- News → use news_search_hackernews, news_get_reddit, news_aggregate\n"
|
## HOW TO USE TOOLS
|
||||||
"- Medical/health topics → use medical_search_pubmed, medical_search_fda\n"
|
When you need to use a tool, output a JSON block in this EXACT format:
|
||||||
"- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n"
|
```json
|
||||||
"- General web search → use web_search, web_search_and_fetch\n"
|
{{"tool_call": {{"name": "tool_name", "arguments": {{"arg1": "value1"}}}}}}
|
||||||
"- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n"
|
```
|
||||||
"IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. "
|
|
||||||
"When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. "
|
For example, to get stock info for AAPL:
|
||||||
"Be concise and factual. Report the exact data returned by tools."
|
```json
|
||||||
)
|
{{"tool_call": {{"name": "finance_get_stock_info", "arguments": {{"symbol": "AAPL"}}}}}}
|
||||||
|
```
|
||||||
|
|
||||||
|
## IMPORTANT RULES
|
||||||
|
1. ALWAYS use tools to get CURRENT data - do NOT say you cannot access real-time data
|
||||||
|
2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool
|
||||||
|
3. Output ONLY the JSON tool_call block when you need to use a tool
|
||||||
|
4. After receiving tool results, provide a helpful response based on the data
|
||||||
|
5. Be concise and factual - report exact data from tools
|
||||||
|
"""
|
||||||
|
|
||||||
if download_info and download_info.get("downloaded"):
|
if download_info and download_info.get("downloaded"):
|
||||||
system_content += f"\n\n--- Website Access ---\n"
|
system_content += f"\n\n--- Website Access ---\n"
|
||||||
system_content += f"I have successfully downloaded and analyzed the website: {download_info.get('url')}\n"
|
system_content += f"Downloaded website: {download_info.get('url')}\n"
|
||||||
system_content += f"Processed {download_info.get('pages')} pages into {download_info.get('chunks')} text chunks.\n"
|
system_content += f"Pages: {download_info.get('pages')}, Chunks: {download_info.get('chunks')}\n"
|
||||||
system_content += "The context below contains the actual content from this website. Use it to answer the user's question."
|
|
||||||
|
|
||||||
if context:
|
if context:
|
||||||
system_content += f"\n\n--- Relevant Context from Knowledge Base ---\n{context}\n"
|
system_content += f"\n\n--- Relevant Context from Knowledge Base ---\n{context}\n"
|
||||||
if sources:
|
if sources:
|
||||||
system_content += f"\n--- Sources ---\n" + "\n".join(f"- {s}" for s in sources[:10])
|
system_content += f"\n--- Sources ---\n" + "\n".join(f"- {s}" for s in sources[:10])
|
||||||
|
|
||||||
|
# Add previous tool results if any
|
||||||
|
if tool_results:
|
||||||
|
system_content += "\n\n--- PREVIOUS TOOL RESULTS ---\n"
|
||||||
|
for tr in tool_results:
|
||||||
|
system_content += f"\nTool: {tr['name']}\nResult: {json.dumps(tr['result'], indent=2)}\n"
|
||||||
|
|
||||||
enhanced.append(ChatMessage(role="system", content=system_content))
|
enhanced.append(ChatMessage(role="system", content=system_content))
|
||||||
|
|
||||||
# Add conversation history (excluding old system messages)
|
# Add conversation history (excluding old system messages)
|
||||||
@ -702,12 +720,74 @@ def build_enhanced_messages(
|
|||||||
return enhanced
|
return enhanced
|
||||||
|
|
||||||
|
|
||||||
|
def _build_tool_descriptions() -> str:
|
||||||
|
"""Build a concise description of all available tools for the system prompt."""
|
||||||
|
if not state.tool_manager:
|
||||||
|
return "No tools available."
|
||||||
|
|
||||||
|
descriptions = []
|
||||||
|
for name, schema in state.tool_manager._schemas.items():
|
||||||
|
func = schema.get("function", {})
|
||||||
|
desc = func.get("description", "")[:100] # Truncate long descriptions
|
||||||
|
params = func.get("parameters", {}).get("properties", {})
|
||||||
|
required = func.get("parameters", {}).get("required", [])
|
||||||
|
|
||||||
|
# Build param list
|
||||||
|
param_strs = []
|
||||||
|
for pname, pinfo in params.items():
|
||||||
|
ptype = pinfo.get("type", "any")
|
||||||
|
preq = " (required)" if pname in required else ""
|
||||||
|
param_strs.append(f"{pname}: {ptype}{preq}")
|
||||||
|
|
||||||
|
params_str = ", ".join(param_strs) if param_strs else "none"
|
||||||
|
descriptions.append(f"- {name}({params_str}): {desc}")
|
||||||
|
|
||||||
|
return "\n".join(descriptions)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_tool_call(content: str) -> Optional[dict]:
|
||||||
|
"""Parse a tool call from LLM response content."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
# Look for JSON tool_call in the response
|
||||||
|
# Pattern 1: ```json {"tool_call": ...} ```
|
||||||
|
json_match = re.search(r'```json\s*(\{.*?"tool_call".*?\})\s*```', content, re.DOTALL)
|
||||||
|
if json_match:
|
||||||
|
try:
|
||||||
|
data = json.loads(json_match.group(1))
|
||||||
|
return data.get("tool_call")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Pattern 2: {"tool_call": {...}} anywhere in response
|
||||||
|
json_match = re.search(r'\{"tool_call":\s*\{[^}]+\}\s*\}', content)
|
||||||
|
if json_match:
|
||||||
|
try:
|
||||||
|
data = json.loads(json_match.group(0))
|
||||||
|
return data.get("tool_call")
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Pattern 3: Look for tool name pattern like [USE: tool_name args]
|
||||||
|
bracket_match = re.search(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL)
|
||||||
|
if bracket_match:
|
||||||
|
name = bracket_match.group(1)
|
||||||
|
args_str = bracket_match.group(2) or "{}"
|
||||||
|
try:
|
||||||
|
args = json.loads(args_str)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
args = {}
|
||||||
|
return {"name": name, "arguments": args}
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
async def generate_response(
|
async def generate_response(
|
||||||
messages: list[ChatMessage],
|
messages: list[ChatMessage],
|
||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generate response using upstream LLM via OpenRouter with tool calling support."""
|
"""Generate response using upstream LLM via OpenRouter with context-based tool calling."""
|
||||||
if not state.llm_client:
|
if not state.llm_client:
|
||||||
# Mock response for testing
|
# Mock response for testing
|
||||||
user_msg = ""
|
user_msg = ""
|
||||||
@ -724,94 +804,87 @@ async def generate_response(
|
|||||||
if m.content:
|
if m.content:
|
||||||
messages_dict.append({"role": m.role, "content": m.content})
|
messages_dict.append({"role": m.role, "content": m.content})
|
||||||
|
|
||||||
# Get available tools
|
# Tool calling loop - NO tools passed to API, tools are in system prompt
|
||||||
tools = None
|
|
||||||
if state.tool_manager and config.ENABLE_TOOLS:
|
|
||||||
tools = state.tool_manager.get_all_schemas()
|
|
||||||
log.info(f"Passing {len(tools)} tools to LLM")
|
|
||||||
|
|
||||||
# Tool calling loop
|
|
||||||
max_iterations = config.MAX_TOOL_ITERATIONS
|
max_iterations = config.MAX_TOOL_ITERATIONS
|
||||||
iteration = 0
|
iteration = 0
|
||||||
|
tool_results = []
|
||||||
|
|
||||||
while iteration < max_iterations:
|
while iteration < max_iterations:
|
||||||
iteration += 1
|
iteration += 1
|
||||||
log.info(f"LLM call iteration {iteration}")
|
log.info(f"LLM call iteration {iteration}")
|
||||||
|
|
||||||
# Call LLM with tools
|
# Call LLM WITHOUT tools parameter - tools are in system prompt
|
||||||
response = await state.llm_client.chat.completions.create(
|
response = await state.llm_client.chat.completions.create(
|
||||||
model=config.UPSTREAM_MODEL,
|
model=config.UPSTREAM_MODEL,
|
||||||
messages=messages_dict,
|
messages=messages_dict,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
tools=tools,
|
# NO tools parameter - using context-based approach
|
||||||
tool_choice="auto" if tools else None,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
log.info(f"LLM response received: {response}")
|
log.info(f"LLM response received")
|
||||||
|
|
||||||
if not response.choices:
|
if not response.choices:
|
||||||
log.warning("No choices in response")
|
log.warning("No choices in response")
|
||||||
return "I apologize, but I couldn't generate a response."
|
return "I apologize, but I couldn't generate a response."
|
||||||
|
|
||||||
message = response.choices[0].message
|
message = response.choices[0].message
|
||||||
log.info(f"Message content: {message.content[:200] if message.content else 'None'}...")
|
content = message.content or ""
|
||||||
log.info(f"Tool calls: {message.tool_calls}")
|
|
||||||
|
log.info(f"Message content length: {len(content)}")
|
||||||
|
|
||||||
# Check if LLM wants to call tools
|
# Check if response contains a tool call
|
||||||
if message.tool_calls and len(message.tool_calls) > 0:
|
tool_call = _parse_tool_call(content)
|
||||||
log.info(f"LLM requested {len(message.tool_calls)} tool calls")
|
|
||||||
|
if tool_call and state.tool_manager:
|
||||||
# Add assistant message with tool calls to history
|
tool_name = tool_call.get("name")
|
||||||
|
tool_args = tool_call.get("arguments", {})
|
||||||
|
|
||||||
|
log.info(f"Parsed tool call: {tool_name}")
|
||||||
|
|
||||||
|
# Execute the tool
|
||||||
|
if isinstance(tool_args, dict):
|
||||||
|
result = state.tool_manager.execute_tool(tool_name, tool_args)
|
||||||
|
else:
|
||||||
|
result = state.tool_manager.execute_tool_from_json(tool_name, json.dumps(tool_args))
|
||||||
|
|
||||||
|
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
|
||||||
|
|
||||||
|
# Store tool result
|
||||||
|
tool_results.append({
|
||||||
|
"name": tool_name,
|
||||||
|
"result": result,
|
||||||
|
})
|
||||||
|
|
||||||
|
# Rebuild system message with tool results
|
||||||
|
# Find and update the system message
|
||||||
|
for i, msg in enumerate(messages_dict):
|
||||||
|
if msg["role"] == "system":
|
||||||
|
# Rebuild with tool results
|
||||||
|
# This is a simplified approach - in production you'd want better state management
|
||||||
|
tool_result_text = f"\n\n--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data."
|
||||||
|
messages_dict[i]["content"] += tool_result_text
|
||||||
|
break
|
||||||
|
|
||||||
|
# Add assistant's tool call as a message
|
||||||
messages_dict.append({
|
messages_dict.append({
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"content": message.content,
|
"content": f"[Executing tool: {tool_name}]"
|
||||||
"tool_calls": [
|
|
||||||
{
|
|
||||||
"id": tc.id,
|
|
||||||
"type": "function",
|
|
||||||
"function": {
|
|
||||||
"name": tc.function.name,
|
|
||||||
"arguments": tc.function.arguments,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for tc in message.tool_calls
|
|
||||||
]
|
|
||||||
})
|
})
|
||||||
|
|
||||||
# Execute each tool call
|
# Add user prompt to continue
|
||||||
for tool_call in message.tool_calls:
|
messages_dict.append({
|
||||||
tool_name = tool_call.function.name
|
"role": "user",
|
||||||
tool_args = tool_call.function.arguments
|
"content": f"The tool {tool_name} returned the above result. Please provide your response to the original question using this data."
|
||||||
|
})
|
||||||
log.info(f"Executing tool: {tool_name}")
|
|
||||||
|
|
||||||
# Execute the tool
|
|
||||||
if state.tool_manager:
|
|
||||||
result = state.tool_manager.execute_tool_from_json(tool_name, tool_args)
|
|
||||||
else:
|
|
||||||
result = {"success": False, "error": "Tool manager not available"}
|
|
||||||
|
|
||||||
# Add tool result to messages
|
|
||||||
messages_dict.append({
|
|
||||||
"role": "tool",
|
|
||||||
"tool_call_id": tool_call.id,
|
|
||||||
"name": tool_name,
|
|
||||||
"content": json.dumps(result),
|
|
||||||
})
|
|
||||||
|
|
||||||
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
|
|
||||||
|
|
||||||
# Continue loop to get final response
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# No tool calls - return the final response
|
# No tool call found - return the response
|
||||||
if message.content:
|
# Clean up any partial tool call syntax from response
|
||||||
log.info(f"Returning final response: {message.content[:100]}...")
|
cleaned_content = _clean_tool_syntax(content)
|
||||||
return message.content
|
log.info(f"Returning final response")
|
||||||
else:
|
return cleaned_content or "I apologize, but I couldn't generate a response."
|
||||||
log.warning("No content in message, returning default")
|
|
||||||
return "I apologize, but I couldn't generate a response."
|
|
||||||
|
|
||||||
# Max iterations reached
|
# Max iterations reached
|
||||||
log.warning(f"Max iterations ({max_iterations}) reached")
|
log.warning(f"Max iterations ({max_iterations}) reached")
|
||||||
@ -824,6 +897,16 @@ async def generate_response(
|
|||||||
return f"I encountered an error: {str(e)}"
|
return f"I encountered an error: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_tool_syntax(content: str) -> str:
|
||||||
|
"""Remove tool call syntax from response if partially included."""
|
||||||
|
import re
|
||||||
|
# Remove ```json ... ``` blocks containing tool_call
|
||||||
|
cleaned = re.sub(r'```json\s*\{.*?"tool_call".*?\}\s*```', '', content, flags=re.DOTALL)
|
||||||
|
# Remove standalone tool_call JSON
|
||||||
|
cleaned = re.sub(r'\{"tool_call":\s*\{[^}]+\}\s*\}', '', cleaned)
|
||||||
|
return cleaned.strip()
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# Document Management Endpoints
|
# Document Management Endpoints
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user