Implement tool calling loop for LLM

- Pass all registered tools to LLM during chat completion - Handle tool_calls from LLM response - Execute tools and feed results back to LLM - Loop until LLM returns final response - Updated system prompt to encourage tool use - Updated streaming to handle tool calls - Increased MAX_TOOL_ITERATIONS to 5
2026-03-29 16:07:56 +00:00 · 2026-03-29 16:07:56 +00:00 · b811162f78
commit b811162f78
parent 4394e7d6f9
19 changed files with 153 additions and 51 deletions
--- a/.gitignore
+++ b/.gitignore
--- a/README.md
+++ b/README.md
--- a/main.py
+++ b/main.py
@ -91,7 +91,7 @@ class Config:
    # Tool settings
    ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true"
-    MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "3"))
+    MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "5"))
 config = Config()
@ -526,29 +526,51 @@ async def stream_chat_completion(
    try:
        if state.llm_client:
-            # Use OpenRouter with streaming
+            # For streaming with tools, we need to handle tool calls first
-            stream = await state.llm_client.chat.completions.create(
+            # Then stream the final response
-                model=config.UPSTREAM_MODEL,
+            if state.tool_manager and config.ENABLE_TOOLS:
-                messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
+                # Use non-streaming for tool calls, then stream the result
-                temperature=request.temperature or 0.7,
+                response_content = await generate_response(
-                max_tokens=request.max_tokens or 4096,
+                    enhanced_messages,
-                stream=True,
+                    temperature=request.temperature or 0.7,
-            )
+                    max_tokens=request.max_tokens or 4096,
                )
                # Stream the final response as a single chunk
                yield f"data: {json.dumps({
                    'id': request_id,
                    'object': 'chat.completion.chunk',
                    'created': created,
                    'model': config.MODEL_NAME,
                    'choices': [{
                        'index': 0,
                        'delta': {'content': response_content},
                        'finish_reason': None
                    }]
                })}\n\n"
            else:
                # No tools - use regular streaming
                stream = await state.llm_client.chat.completions.create(
                    model=config.UPSTREAM_MODEL,
                    messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
                    temperature=request.temperature or 0.7,
                    max_tokens=request.max_tokens or 4096,
                    stream=True,
                )
-            async for chunk in stream:
+                async for chunk in stream:
-                if chunk.choices and chunk.choices[0].delta.content:
+                    if chunk.choices and chunk.choices[0].delta.content:
-                    content = chunk.choices[0].delta.content
+                        content = chunk.choices[0].delta.content
-                    yield f"data: {json.dumps({
+                        yield f"data: {json.dumps({
-                        'id': request_id,
+                            'id': request_id,
-                        'object': 'chat.completion.chunk',
+                            'object': 'chat.completion.chunk',
-                        'created': created,
+                            'created': created,
-                        'model': config.MODEL_NAME,
+                            'model': config.MODEL_NAME,
-                        'choices': [{
+                            'choices': [{
-                            'index': 0,
+                                'index': 0,
-                            'delta': {'content': content},
+                                'delta': {'content': content},
-                            'finish_reason': None
+                                'finish_reason': None
-                        }]
+                            }]
-                    })}\n\n"
+                        })}\n\n"
            # Send final chunk
            yield f"data: {json.dumps({
@ -615,13 +637,20 @@ def build_enhanced_messages(
    """Build enhanced messages with RAG context."""
    enhanced = []
-    # Add system message with RAG context
+    # Add system message with RAG context and tool instructions
    system_content = (
-        "You are a helpful AI assistant with the ability to access and analyze websites on-demand. "
+        "You are a helpful AI assistant with access to real-time data through various tools. "
-        "When a user asks about a website, you can download and analyze its content directly. "
+        "You MUST use these tools to get current information when the user asks about:\n"
-        "Use the provided context from the knowledge base to give accurate and helpful responses. "
+        "- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n"
-        "If context from a website is provided, use it to answer the user's question directly with specific information. "
+        "- Weather → use weather_get_current, weather_get_forecast\n"
-        "Be helpful, detailed, and provide the specific information the user is asking for (headlines, summaries, etc.)."
+        "- News → use news_search_hackernews, news_get_reddit, news_aggregate\n"
        "- Medical/health topics → use medical_search_pubmed, medical_search_fda\n"
        "- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n"
        "- General web search → use web_search, web_search_and_fetch\n"
        "- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n"
        "IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. "
        "When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. "
        "Be concise and factual. Report the exact data returned by tools."
    )
    if download_info and download_info.get("downloaded"):
@ -650,28 +679,8 @@ async def generate_response(
    temperature: float = 0.7,
    max_tokens: int = 4096,
 ) -> str:
-    """Generate response using upstream LLM via OpenRouter."""
+    """Generate response using upstream LLM via OpenRouter with tool calling support."""
-    if state.llm_client:
+    if not state.llm_client:
        try:
            response = await state.llm_client.chat.completions.create(
                model=config.UPSTREAM_MODEL,
                messages=[{"role": m.role, "content": m.content} for m in messages if m.content],
                temperature=temperature,
                max_tokens=max_tokens,
            )
            # Extract content from response
            if response.choices:
                message_content = response.choices[0].message.content
                return message_content or "I apologize, but I couldn't generate a response."
            return "I apologize, but I couldn't generate a response."
        except Exception as e:
            log.error(f"OpenRouter LLM call failed: {e}")
            return f"I encountered an error: {str(e)}"
    else:
        # Mock response for testing
        user_msg = ""
        for msg in reversed(messages):
@ -680,6 +689,99 @@ async def generate_response(
                break
        return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality."
    try:
        # Convert messages to dict format
        messages_dict = []
        for m in messages:
            if m.content:
                messages_dict.append({"role": m.role, "content": m.content})
        # Get available tools
        tools = None
        if state.tool_manager and config.ENABLE_TOOLS:
            tools = state.tool_manager.get_all_schemas()
            log.info(f"Passing {len(tools)} tools to LLM")
        # Tool calling loop
        max_iterations = config.MAX_TOOL_ITERATIONS
        iteration = 0
        while iteration < max_iterations:
            iteration += 1
            log.info(f"LLM call iteration {iteration}")
            # Call LLM with tools
            response = await state.llm_client.chat.completions.create(
                model=config.UPSTREAM_MODEL,
                messages=messages_dict,
                temperature=temperature,
                max_tokens=max_tokens,
                tools=tools,
                tool_choice="auto" if tools else None,
            )
            if not response.choices:
                return "I apologize, but I couldn't generate a response."
            message = response.choices[0].message
            # Check if LLM wants to call tools
            if message.tool_calls:
                log.info(f"LLM requested {len(message.tool_calls)} tool calls")
                # Add assistant message with tool calls to history
                messages_dict.append({
                    "role": "assistant",
                    "content": message.content,
                    "tool_calls": [
                        {
                            "id": tc.id,
                            "type": "function",
                            "function": {
                                "name": tc.function.name,
                                "arguments": tc.function.arguments,
                            }
                        }
                        for tc in message.tool_calls
                    ]
                })
                # Execute each tool call
                for tool_call in message.tool_calls:
                    tool_name = tool_call.function.name
                    tool_args = tool_call.function.arguments
                    log.info(f"Executing tool: {tool_name}")
                    # Execute the tool
                    if state.tool_manager:
                        result = state.tool_manager.execute_tool_from_json(tool_name, tool_args)
                    else:
                        result = {"success": False, "error": "Tool manager not available"}
                    # Add tool result to messages
                    messages_dict.append({
                        "role": "tool",
                        "tool_call_id": tool_call.id,
                        "name": tool_name,
                        "content": json.dumps(result),
                    })
                    log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
                # Continue loop to get final response
                continue
            # No tool calls - return the final response
            return message.content or "I apologize, but I couldn't generate a response."
        # Max iterations reached
        return "I reached the maximum number of tool calls. Please try a more specific question."
    except Exception as e:
        log.error(f"OpenRouter LLM call failed: {e}")
        return f"I encountered an error: {str(e)}"
 # =============================================================================
 # Document Management Endpoints
--- a/rag/init.py
+++ b/rag/init.py
--- a/rag/document_processor.py
+++ b/rag/document_processor.py
--- a/rag/retriever.py
+++ b/rag/retriever.py
--- a/rag/vector_store.py
+++ b/rag/vector_store.py
--- a/requirements.txt
+++ b/requirements.txt
--- a/tools.md
+++ b/tools.md
--- a/tools/init.py
+++ b/tools/init.py
--- a/tools/finance_tool.py
+++ b/tools/finance_tool.py
--- a/tools/medical_tool.py
+++ b/tools/medical_tool.py
--- a/tools/news_tool.py
+++ b/tools/news_tool.py
--- a/tools/science_tool.py
+++ b/tools/science_tool.py
--- a/tools/weather_tool.py
+++ b/tools/weather_tool.py
--- a/tools/web_tool.py
+++ b/tools/web_tool.py
--- a/tools/wikipedia_tool.py
+++ b/tools/wikipedia_tool.py
--- a/website_downloader.py
+++ b/website_downloader.py
--- a/website_downloader_tool.py
+++ b/website_downloader_tool.py