Implement tool calling loop for LLM

- Pass all registered tools to LLM during chat completion
- Handle tool_calls from LLM response
- Execute tools and feed results back to LLM
- Loop until LLM returns final response
- Updated system prompt to encourage tool use
- Updated streaming to handle tool calls
- Increased MAX_TOOL_ITERATIONS to 5
This commit is contained in:
Z User 2026-03-29 16:07:56 +00:00
parent 4394e7d6f9
commit b811162f78
19 changed files with 153 additions and 51 deletions

0
.gitignore vendored Normal file → Executable file
View File

0
README.md Normal file → Executable file
View File

204
main.py Normal file → Executable file
View File

@ -91,7 +91,7 @@ class Config:
# Tool settings # Tool settings
ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true" ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true"
MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "3")) MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "5"))
config = Config() config = Config()
@ -526,29 +526,51 @@ async def stream_chat_completion(
try: try:
if state.llm_client: if state.llm_client:
# Use OpenRouter with streaming # For streaming with tools, we need to handle tool calls first
stream = await state.llm_client.chat.completions.create( # Then stream the final response
model=config.UPSTREAM_MODEL, if state.tool_manager and config.ENABLE_TOOLS:
messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content], # Use non-streaming for tool calls, then stream the result
temperature=request.temperature or 0.7, response_content = await generate_response(
max_tokens=request.max_tokens or 4096, enhanced_messages,
stream=True, temperature=request.temperature or 0.7,
) max_tokens=request.max_tokens or 4096,
)
# Stream the final response as a single chunk
yield f"data: {json.dumps({
'id': request_id,
'object': 'chat.completion.chunk',
'created': created,
'model': config.MODEL_NAME,
'choices': [{
'index': 0,
'delta': {'content': response_content},
'finish_reason': None
}]
})}\n\n"
else:
# No tools - use regular streaming
stream = await state.llm_client.chat.completions.create(
model=config.UPSTREAM_MODEL,
messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
temperature=request.temperature or 0.7,
max_tokens=request.max_tokens or 4096,
stream=True,
)
async for chunk in stream: async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content: if chunk.choices and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content content = chunk.choices[0].delta.content
yield f"data: {json.dumps({ yield f"data: {json.dumps({
'id': request_id, 'id': request_id,
'object': 'chat.completion.chunk', 'object': 'chat.completion.chunk',
'created': created, 'created': created,
'model': config.MODEL_NAME, 'model': config.MODEL_NAME,
'choices': [{ 'choices': [{
'index': 0, 'index': 0,
'delta': {'content': content}, 'delta': {'content': content},
'finish_reason': None 'finish_reason': None
}] }]
})}\n\n" })}\n\n"
# Send final chunk # Send final chunk
yield f"data: {json.dumps({ yield f"data: {json.dumps({
@ -615,13 +637,20 @@ def build_enhanced_messages(
"""Build enhanced messages with RAG context.""" """Build enhanced messages with RAG context."""
enhanced = [] enhanced = []
# Add system message with RAG context # Add system message with RAG context and tool instructions
system_content = ( system_content = (
"You are a helpful AI assistant with the ability to access and analyze websites on-demand. " "You are a helpful AI assistant with access to real-time data through various tools. "
"When a user asks about a website, you can download and analyze its content directly. " "You MUST use these tools to get current information when the user asks about:\n"
"Use the provided context from the knowledge base to give accurate and helpful responses. " "- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n"
"If context from a website is provided, use it to answer the user's question directly with specific information. " "- Weather → use weather_get_current, weather_get_forecast\n"
"Be helpful, detailed, and provide the specific information the user is asking for (headlines, summaries, etc.)." "- News → use news_search_hackernews, news_get_reddit, news_aggregate\n"
"- Medical/health topics → use medical_search_pubmed, medical_search_fda\n"
"- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n"
"- General web search → use web_search, web_search_and_fetch\n"
"- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n"
"IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. "
"When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. "
"Be concise and factual. Report the exact data returned by tools."
) )
if download_info and download_info.get("downloaded"): if download_info and download_info.get("downloaded"):
@ -650,28 +679,8 @@ async def generate_response(
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 4096, max_tokens: int = 4096,
) -> str: ) -> str:
"""Generate response using upstream LLM via OpenRouter.""" """Generate response using upstream LLM via OpenRouter with tool calling support."""
if state.llm_client: if not state.llm_client:
try:
response = await state.llm_client.chat.completions.create(
model=config.UPSTREAM_MODEL,
messages=[{"role": m.role, "content": m.content} for m in messages if m.content],
temperature=temperature,
max_tokens=max_tokens,
)
# Extract content from response
if response.choices:
message_content = response.choices[0].message.content
return message_content or "I apologize, but I couldn't generate a response."
return "I apologize, but I couldn't generate a response."
except Exception as e:
log.error(f"OpenRouter LLM call failed: {e}")
return f"I encountered an error: {str(e)}"
else:
# Mock response for testing # Mock response for testing
user_msg = "" user_msg = ""
for msg in reversed(messages): for msg in reversed(messages):
@ -680,6 +689,99 @@ async def generate_response(
break break
return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality." return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality."
try:
# Convert messages to dict format
messages_dict = []
for m in messages:
if m.content:
messages_dict.append({"role": m.role, "content": m.content})
# Get available tools
tools = None
if state.tool_manager and config.ENABLE_TOOLS:
tools = state.tool_manager.get_all_schemas()
log.info(f"Passing {len(tools)} tools to LLM")
# Tool calling loop
max_iterations = config.MAX_TOOL_ITERATIONS
iteration = 0
while iteration < max_iterations:
iteration += 1
log.info(f"LLM call iteration {iteration}")
# Call LLM with tools
response = await state.llm_client.chat.completions.create(
model=config.UPSTREAM_MODEL,
messages=messages_dict,
temperature=temperature,
max_tokens=max_tokens,
tools=tools,
tool_choice="auto" if tools else None,
)
if not response.choices:
return "I apologize, but I couldn't generate a response."
message = response.choices[0].message
# Check if LLM wants to call tools
if message.tool_calls:
log.info(f"LLM requested {len(message.tool_calls)} tool calls")
# Add assistant message with tool calls to history
messages_dict.append({
"role": "assistant",
"content": message.content,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments,
}
}
for tc in message.tool_calls
]
})
# Execute each tool call
for tool_call in message.tool_calls:
tool_name = tool_call.function.name
tool_args = tool_call.function.arguments
log.info(f"Executing tool: {tool_name}")
# Execute the tool
if state.tool_manager:
result = state.tool_manager.execute_tool_from_json(tool_name, tool_args)
else:
result = {"success": False, "error": "Tool manager not available"}
# Add tool result to messages
messages_dict.append({
"role": "tool",
"tool_call_id": tool_call.id,
"name": tool_name,
"content": json.dumps(result),
})
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
# Continue loop to get final response
continue
# No tool calls - return the final response
return message.content or "I apologize, but I couldn't generate a response."
# Max iterations reached
return "I reached the maximum number of tool calls. Please try a more specific question."
except Exception as e:
log.error(f"OpenRouter LLM call failed: {e}")
return f"I encountered an error: {str(e)}"
# ============================================================================= # =============================================================================
# Document Management Endpoints # Document Management Endpoints

0
rag/__init__.py Normal file → Executable file
View File

0
rag/document_processor.py Normal file → Executable file
View File

0
rag/retriever.py Normal file → Executable file
View File

0
rag/vector_store.py Normal file → Executable file
View File

0
requirements.txt Normal file → Executable file
View File

0
tools.md Normal file → Executable file
View File

0
tools/__init__.py Normal file → Executable file
View File

0
tools/finance_tool.py Normal file → Executable file
View File

0
tools/medical_tool.py Normal file → Executable file
View File

0
tools/news_tool.py Normal file → Executable file
View File

0
tools/science_tool.py Normal file → Executable file
View File

0
tools/weather_tool.py Normal file → Executable file
View File

0
tools/web_tool.py Normal file → Executable file
View File

0
tools/wikipedia_tool.py Normal file → Executable file
View File

0
website_downloader.py Normal file → Executable file
View File

0
website_downloader_tool.py Normal file → Executable file
View File