From b811162f784446dcd5ac9a7fb2ce115ea759ad2f Mon Sep 17 00:00:00 2001
From: Z User <z@container>
Date: Sun, 29 Mar 2026 16:07:56 +0000
Subject: [PATCH] Implement tool calling loop for LLM

- Pass all registered tools to LLM during chat completion
- Handle tool_calls from LLM response
- Execute tools and feed results back to LLM
- Loop until LLM returns final response
- Updated system prompt to encourage tool use
- Updated streaming to handle tool calls
- Increased MAX_TOOL_ITERATIONS to 5
---
 .gitignore                 |   0
 README.md                  |   0
 main.py                    | 204 +++++++++++++++++++++++++++----------
 rag/__init__.py            |   0
 rag/document_processor.py  |   0
 rag/retriever.py           |   0
 rag/vector_store.py        |   0
 requirements.txt           |   0
 tools.md                   |   0
 tools/__init__.py          |   0
 tools/finance_tool.py      |   0
 tools/medical_tool.py      |   0
 tools/news_tool.py         |   0
 tools/science_tool.py      |   0
 tools/weather_tool.py      |   0
 tools/web_tool.py          |   0
 tools/wikipedia_tool.py    |   0
 website_downloader.py      |   0
 website_downloader_tool.py |   0
 19 files changed, 153 insertions(+), 51 deletions(-)
 mode change 100644 => 100755 .gitignore
 mode change 100644 => 100755 README.md
 mode change 100644 => 100755 main.py
 mode change 100644 => 100755 rag/__init__.py
 mode change 100644 => 100755 rag/document_processor.py
 mode change 100644 => 100755 rag/retriever.py
 mode change 100644 => 100755 rag/vector_store.py
 mode change 100644 => 100755 requirements.txt
 mode change 100644 => 100755 tools.md
 mode change 100644 => 100755 tools/__init__.py
 mode change 100644 => 100755 tools/finance_tool.py
 mode change 100644 => 100755 tools/medical_tool.py
 mode change 100644 => 100755 tools/news_tool.py
 mode change 100644 => 100755 tools/science_tool.py
 mode change 100644 => 100755 tools/weather_tool.py
 mode change 100644 => 100755 tools/web_tool.py
 mode change 100644 => 100755 tools/wikipedia_tool.py
 mode change 100644 => 100755 website_downloader.py
 mode change 100644 => 100755 website_downloader_tool.py

diff --git a/.gitignore b/.gitignore
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/main.py b/main.py
old mode 100644
new mode 100755
index a2cf917..6719197
--- a/main.py
+++ b/main.py
@@ -91,7 +91,7 @@ class Config:
 
     # Tool settings
     ENABLE_TOOLS: bool = os.getenv("ENABLE_TOOLS", "true").lower() == "true"
-    MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "3"))
+    MAX_TOOL_ITERATIONS: int = int(os.getenv("MAX_TOOL_ITERATIONS", "5"))
 
 
 config = Config()
@@ -526,29 +526,51 @@ async def stream_chat_completion(
 
     try:
         if state.llm_client:
-            # Use OpenRouter with streaming
-            stream = await state.llm_client.chat.completions.create(
-                model=config.UPSTREAM_MODEL,
-                messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
-                temperature=request.temperature or 0.7,
-                max_tokens=request.max_tokens or 4096,
-                stream=True,
-            )
+            # For streaming with tools, we need to handle tool calls first
+            # Then stream the final response
+            if state.tool_manager and config.ENABLE_TOOLS:
+                # Use non-streaming for tool calls, then stream the result
+                response_content = await generate_response(
+                    enhanced_messages,
+                    temperature=request.temperature or 0.7,
+                    max_tokens=request.max_tokens or 4096,
+                )
+                # Stream the final response as a single chunk
+                yield f"data: {json.dumps({
+                    'id': request_id,
+                    'object': 'chat.completion.chunk',
+                    'created': created,
+                    'model': config.MODEL_NAME,
+                    'choices': [{
+                        'index': 0,
+                        'delta': {'content': response_content},
+                        'finish_reason': None
+                    }]
+                })}\n\n"
+            else:
+                # No tools - use regular streaming
+                stream = await state.llm_client.chat.completions.create(
+                    model=config.UPSTREAM_MODEL,
+                    messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
+                    temperature=request.temperature or 0.7,
+                    max_tokens=request.max_tokens or 4096,
+                    stream=True,
+                )
 
-            async for chunk in stream:
-                if chunk.choices and chunk.choices[0].delta.content:
-                    content = chunk.choices[0].delta.content
-                    yield f"data: {json.dumps({
-                        'id': request_id,
-                        'object': 'chat.completion.chunk',
-                        'created': created,
-                        'model': config.MODEL_NAME,
-                        'choices': [{
-                            'index': 0,
-                            'delta': {'content': content},
-                            'finish_reason': None
-                        }]
-                    })}\n\n"
+                async for chunk in stream:
+                    if chunk.choices and chunk.choices[0].delta.content:
+                        content = chunk.choices[0].delta.content
+                        yield f"data: {json.dumps({
+                            'id': request_id,
+                            'object': 'chat.completion.chunk',
+                            'created': created,
+                            'model': config.MODEL_NAME,
+                            'choices': [{
+                                'index': 0,
+                                'delta': {'content': content},
+                                'finish_reason': None
+                            }]
+                        })}\n\n"
 
             # Send final chunk
             yield f"data: {json.dumps({
@@ -615,13 +637,20 @@ def build_enhanced_messages(
     """Build enhanced messages with RAG context."""
     enhanced = []
 
-    # Add system message with RAG context
+    # Add system message with RAG context and tool instructions
     system_content = (
-        "You are a helpful AI assistant with the ability to access and analyze websites on-demand. "
-        "When a user asks about a website, you can download and analyze its content directly. "
-        "Use the provided context from the knowledge base to give accurate and helpful responses. "
-        "If context from a website is provided, use it to answer the user's question directly with specific information. "
-        "Be helpful, detailed, and provide the specific information the user is asking for (headlines, summaries, etc.)."
+        "You are a helpful AI assistant with access to real-time data through various tools. "
+        "You MUST use these tools to get current information when the user asks about:\n"
+        "- Stocks, crypto, or financial data → use finance_get_stock_info, finance_get_crypto_price, etc.\n"
+        "- Weather → use weather_get_current, weather_get_forecast\n"
+        "- News → use news_search_hackernews, news_get_reddit, news_aggregate\n"
+        "- Medical/health topics → use medical_search_pubmed, medical_search_fda\n"
+        "- Scientific papers → use science_search_arxiv, science_search_semantic_scholar\n"
+        "- General web search → use web_search, web_search_and_fetch\n"
+        "- Wikipedia → use wikipedia_search, wikipedia_get_article\n\n"
+        "IMPORTANT: Always use tools to get CURRENT data. Do not say you cannot access real-time data. "
+        "When asked about stock prices, crypto prices, weather, or news, you MUST call the appropriate tool. "
+        "Be concise and factual. Report the exact data returned by tools."
     )
 
     if download_info and download_info.get("downloaded"):
@@ -650,28 +679,8 @@ async def generate_response(
     temperature: float = 0.7,
     max_tokens: int = 4096,
 ) -> str:
-    """Generate response using upstream LLM via OpenRouter."""
-    if state.llm_client:
-        try:
-            response = await state.llm_client.chat.completions.create(
-                model=config.UPSTREAM_MODEL,
-                messages=[{"role": m.role, "content": m.content} for m in messages if m.content],
-                temperature=temperature,
-                max_tokens=max_tokens,
-            )
-
-            # Extract content from response
-            if response.choices:
-                message_content = response.choices[0].message.content
-                return message_content or "I apologize, but I couldn't generate a response."
-            
-            return "I apologize, but I couldn't generate a response."
-
-        except Exception as e:
-            log.error(f"OpenRouter LLM call failed: {e}")
-            return f"I encountered an error: {str(e)}"
-
-    else:
+    """Generate response using upstream LLM via OpenRouter with tool calling support."""
+    if not state.llm_client:
         # Mock response for testing
         user_msg = ""
         for msg in reversed(messages):
@@ -680,6 +689,99 @@ async def generate_response(
                 break
         return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality."
 
+    try:
+        # Convert messages to dict format
+        messages_dict = []
+        for m in messages:
+            if m.content:
+                messages_dict.append({"role": m.role, "content": m.content})
+
+        # Get available tools
+        tools = None
+        if state.tool_manager and config.ENABLE_TOOLS:
+            tools = state.tool_manager.get_all_schemas()
+            log.info(f"Passing {len(tools)} tools to LLM")
+
+        # Tool calling loop
+        max_iterations = config.MAX_TOOL_ITERATIONS
+        iteration = 0
+
+        while iteration < max_iterations:
+            iteration += 1
+            log.info(f"LLM call iteration {iteration}")
+
+            # Call LLM with tools
+            response = await state.llm_client.chat.completions.create(
+                model=config.UPSTREAM_MODEL,
+                messages=messages_dict,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                tools=tools,
+                tool_choice="auto" if tools else None,
+            )
+
+            if not response.choices:
+                return "I apologize, but I couldn't generate a response."
+
+            message = response.choices[0].message
+
+            # Check if LLM wants to call tools
+            if message.tool_calls:
+                log.info(f"LLM requested {len(message.tool_calls)} tool calls")
+
+                # Add assistant message with tool calls to history
+                messages_dict.append({
+                    "role": "assistant",
+                    "content": message.content,
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "type": "function",
+                            "function": {
+                                "name": tc.function.name,
+                                "arguments": tc.function.arguments,
+                            }
+                        }
+                        for tc in message.tool_calls
+                    ]
+                })
+
+                # Execute each tool call
+                for tool_call in message.tool_calls:
+                    tool_name = tool_call.function.name
+                    tool_args = tool_call.function.arguments
+
+                    log.info(f"Executing tool: {tool_name}")
+
+                    # Execute the tool
+                    if state.tool_manager:
+                        result = state.tool_manager.execute_tool_from_json(tool_name, tool_args)
+                    else:
+                        result = {"success": False, "error": "Tool manager not available"}
+
+                    # Add tool result to messages
+                    messages_dict.append({
+                        "role": "tool",
+                        "tool_call_id": tool_call.id,
+                        "name": tool_name,
+                        "content": json.dumps(result),
+                    })
+
+                    log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
+
+                # Continue loop to get final response
+                continue
+
+            # No tool calls - return the final response
+            return message.content or "I apologize, but I couldn't generate a response."
+
+        # Max iterations reached
+        return "I reached the maximum number of tool calls. Please try a more specific question."
+
+    except Exception as e:
+        log.error(f"OpenRouter LLM call failed: {e}")
+        return f"I encountered an error: {str(e)}"
+
 
 # =============================================================================
 # Document Management Endpoints
diff --git a/rag/__init__.py b/rag/__init__.py
old mode 100644
new mode 100755
diff --git a/rag/document_processor.py b/rag/document_processor.py
old mode 100644
new mode 100755
diff --git a/rag/retriever.py b/rag/retriever.py
old mode 100644
new mode 100755
diff --git a/rag/vector_store.py b/rag/vector_store.py
old mode 100644
new mode 100755
diff --git a/requirements.txt b/requirements.txt
old mode 100644
new mode 100755
diff --git a/tools.md b/tools.md
old mode 100644
new mode 100755
diff --git a/tools/__init__.py b/tools/__init__.py
old mode 100644
new mode 100755
diff --git a/tools/finance_tool.py b/tools/finance_tool.py
old mode 100644
new mode 100755
diff --git a/tools/medical_tool.py b/tools/medical_tool.py
old mode 100644
new mode 100755
diff --git a/tools/news_tool.py b/tools/news_tool.py
old mode 100644
new mode 100755
diff --git a/tools/science_tool.py b/tools/science_tool.py
old mode 100644
new mode 100755
diff --git a/tools/weather_tool.py b/tools/weather_tool.py
old mode 100644
new mode 100755
diff --git a/tools/web_tool.py b/tools/web_tool.py
old mode 100644
new mode 100755
diff --git a/tools/wikipedia_tool.py b/tools/wikipedia_tool.py
old mode 100644
new mode 100755
diff --git a/website_downloader.py b/website_downloader.py
old mode 100644
new mode 100755
diff --git a/website_downloader_tool.py b/website_downloader_tool.py
old mode 100644
new mode 100755