Implement tool calling loop for LLM
- Pass all registered tools to LLM during chat completion - Handle tool_calls from LLM response - Execute tools and feed results back to LLM - Loop until LLM returns final response - Updated system prompt to encourage tool use - Updated streaming to handle tool calls - Increased MAX_TOOL_ITERATIONS to 5
This commit is contained in:
parent
4394e7d6f9
commit
b811162f78
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
204
main.py
Normal file → Executable file
204
main.py
Normal file → Executable file
@ -91,7 +91,7 @@ class Config:
|
||||
|
||||
# Tool settings
|
||||
ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true"
|
||||
MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "3"))
|
||||
MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "5"))
|
||||
|
||||
|
||||
config = Config()
|
||||
@ -526,29 +526,51 @@ async def stream_chat_completion(
|
||||
|
||||
try:
|
||||
if state.llm_client:
|
||||
# Use OpenRouter with streaming
|
||||
stream = await state.llm_client.chat.completions.create(
|
||||
model=config.UPSTREAM_MODEL,
|
||||
messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
|
||||
temperature=request.temperature or 0.7,
|
||||
max_tokens=request.max_tokens or 4096,
|
||||
stream=True,
|
||||
)
|
||||
# For streaming with tools, we need to handle tool calls first
|
||||
# Then stream the final response
|
||||
if state.tool_manager and config.ENABLE_TOOLS:
|
||||
# Use non-streaming for tool calls, then stream the result
|
||||
response_content = await generate_response(
|
||||
enhanced_messages,
|
||||
temperature=request.temperature or 0.7,
|
||||
max_tokens=request.max_tokens or 4096,
|
||||
)
|
||||
# Stream the final response as a single chunk
|
||||
yield f"data: {json.dumps({
|
||||
'id': request_id,
|
||||
'object': 'chat.completion.chunk',
|
||||
'created': created,
|
||||
'model': config.MODEL_NAME,
|
||||
'choices': [{
|
||||
'index': 0,
|
||||
'delta': {'content': response_content},
|
||||
'finish_reason': None
|
||||
}]
|
||||
})}\n\n"
|
||||
else:
|
||||
# No tools - use regular streaming
|
||||
stream = await state.llm_client.chat.completions.create(
|
||||
model=config.UPSTREAM_MODEL,
|
||||
messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
|
||||
temperature=request.temperature or 0.7,
|
||||
max_tokens=request.max_tokens or 4096,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
async for chunk in stream:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
yield f"data: {json.dumps({
|
||||
'id': request_id,
|
||||
'object': 'chat.completion.chunk',
|
||||
'created': created,
|
||||
'model': config.MODEL_NAME,
|
||||
'choices': [{
|
||||
'index': 0,
|
||||
'delta': {'content': content},
|
||||
'finish_reason': None
|
||||
}]
|
||||
})}\n\n"
|
||||
async for chunk in stream:
|
||||
if chunk.choices and chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
yield f"data: {json.dumps({
|
||||
'id': request_id,
|
||||
'object': 'chat.completion.chunk',
|
||||
'created': created,
|
||||
'model': config.MODEL_NAME,
|
||||
'choices': [{
|
||||
'index': 0,
|
||||
'delta': {'content': content},
|
||||
'finish_reason': None
|
||||
}]
|
||||
})}\n\n"
|
||||
|
||||
# Send final chunk
|
||||
yield f"data: {json.dumps({
|
||||
@ -615,13 +637,20 @@ def build_enhanced_messages(
|
||||
"""Build enhanced messages with RAG context."""
|
||||
enhanced = []
|
||||
|
||||
# Add system message with RAG context
|
||||
# Add system message with RAG context and tool instructions
|
||||
system_content = (
|
||||
"You are a helpful AI assistant with the ability to access and analyze websites on-demand. "
|
||||
"When a user asks about a website, you can download and analyze its content directly. "
|
||||
"Use the provided context from the knowledge base to give accurate and helpful responses. "
|
||||
"If context from a website is provided, use it to answer the user's question directly with specific information. "
|
||||
"Be helpful, detailed, and provide the specific information the user is asking for (headlines, summaries, etc.)."
|
||||
"You are a helpful AI assistant with access to real-time data through various tools. "
|
||||
"You MUST use these tools to get current information when the user asks about:\n"
|
||||
"- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n"
|
||||
"- Weather → use weather_get_current, weather_get_forecast\n"
|
||||
"- News → use news_search_hackernews, news_get_reddit, news_aggregate\n"
|
||||
"- Medical/health topics → use medical_search_pubmed, medical_search_fda\n"
|
||||
"- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n"
|
||||
"- General web search → use web_search, web_search_and_fetch\n"
|
||||
"- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n"
|
||||
"IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. "
|
||||
"When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. "
|
||||
"Be concise and factual. Report the exact data returned by tools."
|
||||
)
|
||||
|
||||
if download_info and download_info.get("downloaded"):
|
||||
@ -650,28 +679,8 @@ async def generate_response(
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 4096,
|
||||
) -> str:
|
||||
"""Generate response using upstream LLM via OpenRouter."""
|
||||
if state.llm_client:
|
||||
try:
|
||||
response = await state.llm_client.chat.completions.create(
|
||||
model=config.UPSTREAM_MODEL,
|
||||
messages=[{"role": m.role, "content": m.content} for m in messages if m.content],
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
)
|
||||
|
||||
# Extract content from response
|
||||
if response.choices:
|
||||
message_content = response.choices[0].message.content
|
||||
return message_content or "I apologize, but I couldn't generate a response."
|
||||
|
||||
return "I apologize, but I couldn't generate a response."
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"OpenRouter LLM call failed: {e}")
|
||||
return f"I encountered an error: {str(e)}"
|
||||
|
||||
else:
|
||||
"""Generate response using upstream LLM via OpenRouter with tool calling support."""
|
||||
if not state.llm_client:
|
||||
# Mock response for testing
|
||||
user_msg = ""
|
||||
for msg in reversed(messages):
|
||||
@ -680,6 +689,99 @@ async def generate_response(
|
||||
break
|
||||
return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality."
|
||||
|
||||
try:
|
||||
# Convert messages to dict format
|
||||
messages_dict = []
|
||||
for m in messages:
|
||||
if m.content:
|
||||
messages_dict.append({"role": m.role, "content": m.content})
|
||||
|
||||
# Get available tools
|
||||
tools = None
|
||||
if state.tool_manager and config.ENABLE_TOOLS:
|
||||
tools = state.tool_manager.get_all_schemas()
|
||||
log.info(f"Passing {len(tools)} tools to LLM")
|
||||
|
||||
# Tool calling loop
|
||||
max_iterations = config.MAX_TOOL_ITERATIONS
|
||||
iteration = 0
|
||||
|
||||
while iteration < max_iterations:
|
||||
iteration += 1
|
||||
log.info(f"LLM call iteration {iteration}")
|
||||
|
||||
# Call LLM with tools
|
||||
response = await state.llm_client.chat.completions.create(
|
||||
model=config.UPSTREAM_MODEL,
|
||||
messages=messages_dict,
|
||||
temperature=temperature,
|
||||
max_tokens=max_tokens,
|
||||
tools=tools,
|
||||
tool_choice="auto" if tools else None,
|
||||
)
|
||||
|
||||
if not response.choices:
|
||||
return "I apologize, but I couldn't generate a response."
|
||||
|
||||
message = response.choices[0].message
|
||||
|
||||
# Check if LLM wants to call tools
|
||||
if message.tool_calls:
|
||||
log.info(f"LLM requested {len(message.tool_calls)} tool calls")
|
||||
|
||||
# Add assistant message with tool calls to history
|
||||
messages_dict.append({
|
||||
"role": "assistant",
|
||||
"content": message.content,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": tc.id,
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.function.name,
|
||||
"arguments": tc.function.arguments,
|
||||
}
|
||||
}
|
||||
for tc in message.tool_calls
|
||||
]
|
||||
})
|
||||
|
||||
# Execute each tool call
|
||||
for tool_call in message.tool_calls:
|
||||
tool_name = tool_call.function.name
|
||||
tool_args = tool_call.function.arguments
|
||||
|
||||
log.info(f"Executing tool: {tool_name}")
|
||||
|
||||
# Execute the tool
|
||||
if state.tool_manager:
|
||||
result = state.tool_manager.execute_tool_from_json(tool_name, tool_args)
|
||||
else:
|
||||
result = {"success": False, "error": "Tool manager not available"}
|
||||
|
||||
# Add tool result to messages
|
||||
messages_dict.append({
|
||||
"role": "tool",
|
||||
"tool_call_id": tool_call.id,
|
||||
"name": tool_name,
|
||||
"content": json.dumps(result),
|
||||
})
|
||||
|
||||
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
|
||||
|
||||
# Continue loop to get final response
|
||||
continue
|
||||
|
||||
# No tool calls - return the final response
|
||||
return message.content or "I apologize, but I couldn't generate a response."
|
||||
|
||||
# Max iterations reached
|
||||
return "I reached the maximum number of tool calls. Please try a more specific question."
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"OpenRouter LLM call failed: {e}")
|
||||
return f"I encountered an error: {str(e)}"
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Document Management Endpoints
|
||||
|
||||
0
rag/__init__.py
Normal file → Executable file
0
rag/__init__.py
Normal file → Executable file
0
rag/document_processor.py
Normal file → Executable file
0
rag/document_processor.py
Normal file → Executable file
0
rag/retriever.py
Normal file → Executable file
0
rag/retriever.py
Normal file → Executable file
0
rag/vector_store.py
Normal file → Executable file
0
rag/vector_store.py
Normal file → Executable file
0
requirements.txt
Normal file → Executable file
0
requirements.txt
Normal file → Executable file
0
tools/__init__.py
Normal file → Executable file
0
tools/__init__.py
Normal file → Executable file
0
tools/finance_tool.py
Normal file → Executable file
0
tools/finance_tool.py
Normal file → Executable file
0
tools/medical_tool.py
Normal file → Executable file
0
tools/medical_tool.py
Normal file → Executable file
0
tools/news_tool.py
Normal file → Executable file
0
tools/news_tool.py
Normal file → Executable file
0
tools/science_tool.py
Normal file → Executable file
0
tools/science_tool.py
Normal file → Executable file
0
tools/weather_tool.py
Normal file → Executable file
0
tools/weather_tool.py
Normal file → Executable file
0
tools/web_tool.py
Normal file → Executable file
0
tools/web_tool.py
Normal file → Executable file
0
tools/wikipedia_tool.py
Normal file → Executable file
0
tools/wikipedia_tool.py
Normal file → Executable file
0
website_downloader.py
Normal file → Executable file
0
website_downloader.py
Normal file → Executable file
0
website_downloader_tool.py
Normal file → Executable file
0
website_downloader_tool.py
Normal file → Executable file
Loading…
Reference in New Issue
Block a user