From b811162f784446dcd5ac9a7fb2ce115ea759ad2f Mon Sep 17 00:00:00 2001 From: Z User Date: Sun, 29 Mar 2026 16:07:56 +0000 Subject: [PATCH] Implement tool calling loop for LLM - Pass all registered tools to LLM during chat completion - Handle tool_calls from LLM response - Execute tools and feed results back to LLM - Loop until LLM returns final response - Updated system prompt to encourage tool use - Updated streaming to handle tool calls - Increased MAX_TOOL_ITERATIONS to 5 --- .gitignore | 0 README.md | 0 main.py | 204 +++++++++++++++++++++++++++---------- rag/__init__.py | 0 rag/document_processor.py | 0 rag/retriever.py | 0 rag/vector_store.py | 0 requirements.txt | 0 tools.md | 0 tools/__init__.py | 0 tools/finance_tool.py | 0 tools/medical_tool.py | 0 tools/news_tool.py | 0 tools/science_tool.py | 0 tools/weather_tool.py | 0 tools/web_tool.py | 0 tools/wikipedia_tool.py | 0 website_downloader.py | 0 website_downloader_tool.py | 0 19 files changed, 153 insertions(+), 51 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 README.md mode change 100644 => 100755 main.py mode change 100644 => 100755 rag/__init__.py mode change 100644 => 100755 rag/document_processor.py mode change 100644 => 100755 rag/retriever.py mode change 100644 => 100755 rag/vector_store.py mode change 100644 => 100755 requirements.txt mode change 100644 => 100755 tools.md mode change 100644 => 100755 tools/__init__.py mode change 100644 => 100755 tools/finance_tool.py mode change 100644 => 100755 tools/medical_tool.py mode change 100644 => 100755 tools/news_tool.py mode change 100644 => 100755 tools/science_tool.py mode change 100644 => 100755 tools/weather_tool.py mode change 100644 => 100755 tools/web_tool.py mode change 100644 => 100755 tools/wikipedia_tool.py mode change 100644 => 100755 website_downloader.py mode change 100644 => 100755 website_downloader_tool.py diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/main.py b/main.py old mode 100644 new mode 100755 index a2cf917..6719197 --- a/main.py +++ b/main.py @@ -91,7 +91,7 @@ class Config: # Tool settings ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true" - MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "3")) + MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "5")) config = Config() @@ -526,29 +526,51 @@ async def stream_chat_completion( try: if state.llm_client: - # Use OpenRouter with streaming - stream = await state.llm_client.chat.completions.create( - model=config.UPSTREAM_MODEL, - messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content], - temperature=request.temperature or 0.7, - max_tokens=request.max_tokens or 4096, - stream=True, - ) + # For streaming with tools, we need to handle tool calls first + # Then stream the final response + if state.tool_manager and config.ENABLE_TOOLS: + # Use non-streaming for tool calls, then stream the result + response_content = await generate_response( + enhanced_messages, + temperature=request.temperature or 0.7, + max_tokens=request.max_tokens or 4096, + ) + # Stream the final response as a single chunk + yield f"data: {json.dumps({ + 'id': request_id, + 'object': 'chat.completion.chunk', + 'created': created, + 'model': config.MODEL_NAME, + 'choices': [{ + 'index': 0, + 'delta': {'content': response_content}, + 'finish_reason': None + }] + })}\n\n" + else: + # No tools - use regular streaming + stream = await state.llm_client.chat.completions.create( + model=config.UPSTREAM_MODEL, + messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content], + temperature=request.temperature or 0.7, + max_tokens=request.max_tokens or 4096, + stream=True, + ) - async for chunk in stream: - if chunk.choices and chunk.choices[0].delta.content: - content = chunk.choices[0].delta.content - yield f"data: {json.dumps({ - 'id': request_id, - 'object': 'chat.completion.chunk', - 'created': created, - 'model': config.MODEL_NAME, - 'choices': [{ - 'index': 0, - 'delta': {'content': content}, - 'finish_reason': None - }] - })}\n\n" + async for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: + content = chunk.choices[0].delta.content + yield f"data: {json.dumps({ + 'id': request_id, + 'object': 'chat.completion.chunk', + 'created': created, + 'model': config.MODEL_NAME, + 'choices': [{ + 'index': 0, + 'delta': {'content': content}, + 'finish_reason': None + }] + })}\n\n" # Send final chunk yield f"data: {json.dumps({ @@ -615,13 +637,20 @@ def build_enhanced_messages( """Build enhanced messages with RAG context.""" enhanced = [] - # Add system message with RAG context + # Add system message with RAG context and tool instructions system_content = ( - "You are a helpful AI assistant with the ability to access and analyze websites on-demand. " - "When a user asks about a website, you can download and analyze its content directly. " - "Use the provided context from the knowledge base to give accurate and helpful responses. " - "If context from a website is provided, use it to answer the user's question directly with specific information. " - "Be helpful, detailed, and provide the specific information the user is asking for (headlines, summaries, etc.)." + "You are a helpful AI assistant with access to real-time data through various tools. " + "You MUST use these tools to get current information when the user asks about:\n" + "- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n" + "- Weather → use weather_get_current, weather_get_forecast\n" + "- News → use news_search_hackernews, news_get_reddit, news_aggregate\n" + "- Medical/health topics → use medical_search_pubmed, medical_search_fda\n" + "- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n" + "- General web search → use web_search, web_search_and_fetch\n" + "- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n" + "IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. " + "When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. " + "Be concise and factual. Report the exact data returned by tools." ) if download_info and download_info.get("downloaded"): @@ -650,28 +679,8 @@ async def generate_response( temperature: float = 0.7, max_tokens: int = 4096, ) -> str: - """Generate response using upstream LLM via OpenRouter.""" - if state.llm_client: - try: - response = await state.llm_client.chat.completions.create( - model=config.UPSTREAM_MODEL, - messages=[{"role": m.role, "content": m.content} for m in messages if m.content], - temperature=temperature, - max_tokens=max_tokens, - ) - - # Extract content from response - if response.choices: - message_content = response.choices[0].message.content - return message_content or "I apologize, but I couldn't generate a response." - - return "I apologize, but I couldn't generate a response." - - except Exception as e: - log.error(f"OpenRouter LLM call failed: {e}") - return f"I encountered an error: {str(e)}" - - else: + """Generate response using upstream LLM via OpenRouter with tool calling support.""" + if not state.llm_client: # Mock response for testing user_msg = "" for msg in reversed(messages): @@ -680,6 +689,99 @@ async def generate_response( break return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality." + try: + # Convert messages to dict format + messages_dict = [] + for m in messages: + if m.content: + messages_dict.append({"role": m.role, "content": m.content}) + + # Get available tools + tools = None + if state.tool_manager and config.ENABLE_TOOLS: + tools = state.tool_manager.get_all_schemas() + log.info(f"Passing {len(tools)} tools to LLM") + + # Tool calling loop + max_iterations = config.MAX_TOOL_ITERATIONS + iteration = 0 + + while iteration < max_iterations: + iteration += 1 + log.info(f"LLM call iteration {iteration}") + + # Call LLM with tools + response = await state.llm_client.chat.completions.create( + model=config.UPSTREAM_MODEL, + messages=messages_dict, + temperature=temperature, + max_tokens=max_tokens, + tools=tools, + tool_choice="auto" if tools else None, + ) + + if not response.choices: + return "I apologize, but I couldn't generate a response." + + message = response.choices[0].message + + # Check if LLM wants to call tools + if message.tool_calls: + log.info(f"LLM requested {len(message.tool_calls)} tool calls") + + # Add assistant message with tool calls to history + messages_dict.append({ + "role": "assistant", + "content": message.content, + "tool_calls": [ + { + "id": tc.id, + "type": "function", + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + } + } + for tc in message.tool_calls + ] + }) + + # Execute each tool call + for tool_call in message.tool_calls: + tool_name = tool_call.function.name + tool_args = tool_call.function.arguments + + log.info(f"Executing tool: {tool_name}") + + # Execute the tool + if state.tool_manager: + result = state.tool_manager.execute_tool_from_json(tool_name, tool_args) + else: + result = {"success": False, "error": "Tool manager not available"} + + # Add tool result to messages + messages_dict.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "name": tool_name, + "content": json.dumps(result), + }) + + log.info(f"Tool {tool_name} result: success={result.get('success', False)}") + + # Continue loop to get final response + continue + + # No tool calls - return the final response + return message.content or "I apologize, but I couldn't generate a response." + + # Max iterations reached + return "I reached the maximum number of tool calls. Please try a more specific question." + + except Exception as e: + log.error(f"OpenRouter LLM call failed: {e}") + return f"I encountered an error: {str(e)}" + # ============================================================================= # Document Management Endpoints diff --git a/rag/__init__.py b/rag/__init__.py old mode 100644 new mode 100755 diff --git a/rag/document_processor.py b/rag/document_processor.py old mode 100644 new mode 100755 diff --git a/rag/retriever.py b/rag/retriever.py old mode 100644 new mode 100755 diff --git a/rag/vector_store.py b/rag/vector_store.py old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/tools.md b/tools.md old mode 100644 new mode 100755 diff --git a/tools/__init__.py b/tools/__init__.py old mode 100644 new mode 100755 diff --git a/tools/finance_tool.py b/tools/finance_tool.py old mode 100644 new mode 100755 diff --git a/tools/medical_tool.py b/tools/medical_tool.py old mode 100644 new mode 100755 diff --git a/tools/news_tool.py b/tools/news_tool.py old mode 100644 new mode 100755 diff --git a/tools/science_tool.py b/tools/science_tool.py old mode 100644 new mode 100755 diff --git a/tools/weather_tool.py b/tools/weather_tool.py old mode 100644 new mode 100755 diff --git a/tools/web_tool.py b/tools/web_tool.py old mode 100644 new mode 100755 diff --git a/tools/wikipedia_tool.py b/tools/wikipedia_tool.py old mode 100644 new mode 100755 diff --git a/website_downloader.py b/website_downloader.py old mode 100644 new mode 100755 diff --git a/website_downloader_tool.py b/website_downloader_tool.py old mode 100644 new mode 100755