Fix tool calling: switch to native OpenAI tools parameter

Problems fixed: - 'Mega tool call': LLM outputting multiple tool calls that got bundled into one. Now uses native OpenAI tools parameter which handles multiple tool calls properly via message.tool_calls array. - 'Returning nothing': _clean_tool_syntax was too aggressive, stripping the entire response. Now only strips code-fence-wrapped blocks. - Tool results were appended to system message growing it unboundedly; now uses proper 'tool' role messages in conversation history. Key changes: - generate_response: passes tools/tool_choice to OpenAI API (native tool calling), with retry without tool_choice for unsupported models - generate_response: handles multiple tool_calls per response natively - generate_response: uses proper 'tool' role for results instead of appending to system message - _parse_tool_calls (was _parse_tool_call): now returns a list, supports multiple tool calls, used as fallback for models without native tools - _clean_tool_syntax: much less aggressive, only strips code-fence blocks, no longer removes bare JSON (was eating valid responses) - System prompt: removed JSON format instructions (native tools handles format), simplified rules
2026-03-29 17:57:26 +00:00 · 2026-03-29 17:57:26 +00:00 · 57228625fc
commit 57228625fc
parent c03bde8023
1 changed files with 237 additions and 178 deletions
--- a/main.py
+++ b/main.py
@ -670,28 +670,16 @@ def build_enhanced_messages(
    tool_descriptions = _build_tool_descriptions()
    
    # Add system message with RAG context and tool instructions
-    system_content = f"""You are a helpful AI assistant with access to real-time data through various tools.
+    system_content = """You are a helpful AI assistant with access to real-time data through various tools.

 ## AVAILABLE TOOLS
-{tool_descriptions}
-
-## HOW TO USE TOOLS
-When you need to use a tool, output a JSON block in this EXACT format:
-```json
-{{"tool_call": {{"name": "tool_name", "arguments": {{"arg1": "value1"}}}}}}
-```
-
-For example, to get stock info for AAPL:
-```json
-{{"tool_call": {{"name": "finance_get_stock_info", "arguments": {{"symbol": "AAPL"}}}}}}
-```
+You have access to tools for getting real-time data. Use them whenever you need current information.

 ## IMPORTANT RULES
-1. ALWAYS use tools to get CURRENT data - do NOT say you cannot access real-time data
+1. ALWAYS use your available tools to get CURRENT data - do NOT say you cannot access real-time data
 2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool
-3. Output ONLY the JSON tool_call block when you need to use a tool
-4. After receiving tool results, provide a helpful response based on the data
-5. Be concise and factual - report exact data from tools
+3. After receiving tool results, provide a helpful, natural-language response based on the data
+4. Be concise and factual - report exact data from tools
 """

    if download_info and download_info.get("downloaded"):
@ -745,75 +733,98 @@ def _build_tool_descriptions() -> str:
    return "\n".join(descriptions)


-def _parse_tool_call(content: str) -> Optional[dict]:
-    """Parse a tool call from LLM response content."""
-    import re
+def _parse_tool_calls(content: str) -> list[dict]:
+    """Parse tool calls from LLM response content (fallback for models without native tool support).

-    def _extract_json_object(text: str, start_key: str) -> Optional[dict]:
-        """Extract a JSON object containing start_key using brace counting."""
-        # Find the start of the outermost object containing start_key
-        idx = text.find(start_key)
-        if idx == -1:
-            return None
-        # Walk backwards to find the opening { of this object
-        depth = 0
-        obj_start = -1
-        for i in range(idx, -1, -1):
-            if text[i] == '}':
-                depth += 1
-            elif text[i] == '{':
-                if depth == 0:
-                    obj_start = i
-                    break
-                depth -= 1
-        if obj_start == -1:
-            return None
-        # Walk forwards to find the matching closing }
-        depth = 0
-        obj_end = -1
-        for i in range(obj_start, len(text)):
-            if text[i] == '{':
-                depth += 1
-            elif text[i] == '}':
-                depth -= 1
-                if depth == 0:
-                    obj_end = i + 1
-                    break
-        if obj_end == -1:
-            return None
-        try:
-            return json.loads(text[obj_start:obj_end])
-        except json.JSONDecodeError:
-            return None
+    Returns a list of tool call dicts, each with 'name' and 'arguments' keys.
+    Supports multiple tool calls in a single response.
+    """
+    tool_calls = []

-    # Pattern 1: code fence blocks (```json, ```, ```JSON, etc.)
-    # Match any code fence that might contain a tool_call
-    fence_match = re.search(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
-    if fence_match:
-        block_text = fence_match.group(1)
+    def _extract_all_json_objects(text: str, start_key: str) -> list[dict]:
+        """Extract ALL JSON objects containing start_key using brace counting."""
+        results = []
+        search_start = 0
+        while True:
+            idx = text.find(start_key, search_start)
+            if idx == -1:
+                break
+            # Walk backwards to find the opening { of this object
+            depth = 0
+            obj_start = -1
+            for i in range(idx, -1, -1):
+                if text[i] == '}':
+                    depth += 1
+                elif text[i] == '{':
+                    if depth == 0:
+                        obj_start = i
+                        break
+                    depth -= 1
+            if obj_start == -1:
+                break
+            # Walk forwards to find the matching closing }
+            depth = 0
+            obj_end = -1
+            for i in range(obj_start, len(text)):
+                if text[i] == '{':
+                    depth += 1
+                elif text[i] == '}':
+                    depth -= 1
+                    if depth == 0:
+                        obj_end = i + 1
+                        break
+            if obj_end == -1:
+                break
+            try:
+                obj = json.loads(text[obj_start:obj_end])
+                if obj and isinstance(obj, dict):
+                    results.append(obj)
+            except json.JSONDecodeError:
+                pass
+            # Move past this object to find the next one
+            search_start = obj_end
+        return results
+
+    # Pattern 1: code fence blocks containing tool_call
+    fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
+    for block_text in fence_matches:
        if '"tool_call"' in block_text:
-            data = _extract_json_object(block_text, '"tool_call"')
-            if data and "tool_call" in data:
-                return data.get("tool_call")
+            objects = _extract_all_json_objects(block_text, '"tool_call"')
+            for obj in objects:
+                if "tool_call" in obj:
+                    tc = obj["tool_call"]
+                    if isinstance(tc, dict) and "name" in tc:
+                        tool_calls.append(tc)

-    # Pattern 2: {"tool_call": {...}} anywhere in response (bare JSON)
-    if '"tool_call"' in content:
-        data = _extract_json_object(content, '"tool_call"')
-        if data and "tool_call" in data:
-            return data.get("tool_call")
+    # Pattern 2: bare JSON {"tool_call": {...}} outside code fences
+    # Strip code fences first to avoid double-parsing
+    stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
+    if '"tool_call"' in stripped:
+        objects = _extract_all_json_objects(stripped, '"tool_call"')
+        for obj in objects:
+            if "tool_call" in obj:
+                tc = obj["tool_call"]
+                if isinstance(tc, dict) and "name" in tc:
+                    # Avoid duplicates
+                    if not any(
+                        existing.get("name") == tc.get("name") and
+                        existing.get("arguments") == tc.get("arguments")
+                        for existing in tool_calls
+                    ):
+                        tool_calls.append(tc)

-    # Pattern 3: Look for tool name pattern like [USE: tool_name args]
-    bracket_match = re.search(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL)
-    if bracket_match:
-        name = bracket_match.group(1)
-        args_str = bracket_match.group(2) or "{}"
+    # Pattern 3: [USE: tool_name args] pattern
+    bracket_matches = re.findall(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL)
+    for match in bracket_matches:
+        name = match[0]
+        args_str = match[1] or "{}"
        try:
            args = json.loads(args_str)
        except json.JSONDecodeError:
            args = {}
-        return {"name": name, "arguments": args}
+        tool_calls.append({"name": name, "arguments": args})

-    return None
+    return tool_calls


 async def generate_response(
@ -821,7 +832,11 @@ async def generate_response(
    temperature: float = 0.7,
    max_tokens: int = 4096,
 ) -> str:
-    """Generate response using upstream LLM via OpenRouter with context-based tool calling."""
+    """Generate response using upstream LLM via OpenRouter with native tool calling.
+
+    Uses OpenAI-compatible `tools` parameter for reliable tool calling.
+    Falls back to content-based parsing if the model doesn't support native tools.
+    """
    if not state.llm_client:
        # Mock response for testing
        user_msg = ""
@ -838,91 +853,168 @@ async def generate_response(
            if m.content:
                messages_dict.append({"role": m.role, "content": m.content})

-        # Tool calling loop - NO tools passed to API, tools are in system prompt
+        # Prepare native tool schemas for OpenAI API
+        native_tools = None
+        if state.tool_manager and config.ENABLE_TOOLS:
+            schemas = state.tool_manager.get_all_schemas()
+            if schemas:
+                native_tools = []
+                for schema in schemas:
+                    if isinstance(schema, dict):
+                        # Ensure correct OpenAI tools format
+                        if schema.get("type") == "function" and "function" in schema:
+                            native_tools.append(schema)
+                        else:
+                            # Wrap bare function schema
+                            native_tools.append({
+                                "type": "function",
+                                "function": schema,
+                            })
+                    else:
+                        log.warning(f"Skipping non-dict tool schema: {schema}")
+
+        if native_tools:
+            log.info(f"Passing {len(native_tools)} tools to LLM API")
+        else:
+            log.info("No native tools available, using content-only mode")
+
+        # Tool calling loop
        max_iterations = config.MAX_TOOL_ITERATIONS
        iteration = 0
-        tool_results = []

        while iteration < max_iterations:
            iteration += 1
            log.info(f"LLM call iteration {iteration}")

-            # Call LLM WITHOUT tools parameter - tools are in system prompt
-            response = await state.llm_client.chat.completions.create(
-                model=config.UPSTREAM_MODEL,
-                messages=messages_dict,
-                temperature=temperature,
-                max_tokens=max_tokens,
-                # NO tools parameter - using context-based approach
-            )
+            # Build API call parameters
+            api_params = {
+                "model": config.UPSTREAM_MODEL,
+                "messages": messages_dict,
+                "temperature": temperature,
+                "max_tokens": max_tokens,
+            }
+            if native_tools:
+                api_params["tools"] = native_tools
+                api_params["tool_choice"] = "auto"

-            log.info(f"LLM response received")
+            # Call LLM (with retry without tool_choice if model doesn't support it)
+            try:
+                response = await state.llm_client.chat.completions.create(**api_params)
+            except Exception as api_err:
+                err_str = str(api_err).lower()
+                if "tool_choice" in err_str and native_tools:
+                    log.warning(f"Model doesn't support tool_choice, retrying without it: {api_err}")
+                    del api_params["tool_choice"]
+                    response = await state.llm_client.chat.completions.create(**api_params)
+                else:
+                    raise

            if not response.choices:
                log.warning("No choices in response")
                return "I apologize, but I couldn't generate a response."

-            message = response.choices[0].message
+            choice = response.choices[0]
+            message = choice.message
            content = message.content or ""
+            finish_reason = choice.finish_reason or "stop"

-            log.info(f"Message content length: {len(content)}")
+            log.info(f"LLM response: content_len={len(content)}, finish_reason={finish_reason}")

-            # Check if response contains a tool call
-            tool_call = _parse_tool_call(content)
+            # --- Handle native tool calls (preferred path) ---
+            native_tool_calls = getattr(message, 'tool_calls', None)

-            if tool_call:
-                tool_name = tool_call.get("name")
-                tool_args = tool_call.get("arguments", {})
+            if native_tool_calls:
+                log.info(f"Native tool calls detected: {len(native_tool_calls)}")

-                if state.tool_manager:
-                    log.info(f"Parsed tool call: {tool_name}")
+                # Build assistant message with tool_calls for conversation history
+                assistant_msg = {
+                    "role": "assistant",
+                    "content": content if content else None,
+                    "tool_calls": [
+                        {
+                            "id": tc.id,
+                            "type": "function",
+                            "function": {
+                                "name": tc.function.name,
+                                "arguments": tc.function.arguments or "{}",
+                            },
+                        }
+                        for tc in native_tool_calls
+                    ],
+                }
+                messages_dict.append(assistant_msg)

-                    # Execute the tool (run in thread pool to avoid blocking the event loop)
-                    if isinstance(tool_args, dict):
+                # Execute each tool and add result messages
+                for tc in native_tool_calls:
+                    tool_name = tc.function.name
+                    try:
+                        tool_args = json.loads(tc.function.arguments or "{}")
+                    except json.JSONDecodeError:
+                        log.warning(f"Failed to parse tool arguments for {tool_name}: {tc.function.arguments}")
+                        tool_args = {}
+
+                    log.info(f"Executing native tool: {tool_name} with args: {tool_args}")
+
+                    if state.tool_manager:
                        result = await asyncio.to_thread(
                            state.tool_manager.execute_tool, tool_name, tool_args
                        )
                    else:
-                        result = await asyncio.to_thread(
-                            state.tool_manager.execute_tool_from_json, tool_name, json.dumps(tool_args)
-                        )
+                        result = {"success": False, "error": "No tool manager available"}

                    log.info(f"Tool {tool_name} result: success={result.get('success', False)}")

-                    # Store tool result
-                    tool_results.append({
-                        "name": tool_name,
-                        "result": result,
-                    })
-                    
-                    # Rebuild system message with tool results
-                    # Find and update the system message
-                    for i, msg in enumerate(messages_dict):
-                        if msg["role"] == "system":
-                            tool_result_text = f"\n\n--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data."
-                            messages_dict[i]["content"] += tool_result_text
-                            break
-                    
-                    # Add assistant's tool call as a message
+                    # Add tool result using proper 'tool' role
                    messages_dict.append({
-                        "role": "assistant",
-                        "content": f"[Executing tool: {tool_name}]"
+                        "role": "tool",
+                        "tool_call_id": tc.id,
+                        "content": json.dumps(result),
                    })

-                    # Add user prompt to continue
+                continue
+
+            # --- Fallback: parse tool calls from content (for models without native tool support) ---
+            content_tool_calls = _parse_tool_calls(content)
+
+            if content_tool_calls:
+                log.info(f"Content-based tool calls detected: {len(content_tool_calls)}")
+
+                # Add the assistant's raw response to conversation
+                messages_dict.append({"role": "assistant", "content": content})
+
+                for tool_call in content_tool_calls:
+                    tool_name = tool_call.get("name")
+                    tool_args = tool_call.get("arguments", {})
+
+                    if not isinstance(tool_args, dict):
+                        try:
+                            tool_args = json.loads(tool_args)
+                        except (json.JSONDecodeError, TypeError):
+                            tool_args = {}
+
+                    log.info(f"Executing content-based tool: {tool_name}")
+
+                    if state.tool_manager:
+                        result = await asyncio.to_thread(
+                            state.tool_manager.execute_tool, tool_name, tool_args
+                        )
+                    else:
+                        result = {"success": False, "error": "No tool manager available"}
+
+                    log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
+
+                    # Feed result back as a user message
                    messages_dict.append({
                        "role": "user",
-                        "content": f"The tool {tool_name} returned the above result. Please provide your response to the original question using this data."
+                        "content": f"--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data.",
                    })

-                    continue
-                else:
-                    log.warning(f"Tool call detected ({tool_name}) but tool_manager is None! Stripping tool call from response.")
+                continue

-            # No tool call found (or tool_manager unavailable) - return the response
-            # ALWAYS run cleanup to strip any residual tool_call JSON from response
+            # --- No tool calls - return the final response ---
+            # Light cleanup: only strip code-fence-wrapped tool_call blocks
            cleaned_content = _clean_tool_syntax(content)
-            log.info(f"Returning final response (cleaned={len(cleaned_content) != len(content)})")
+            log.info(f"Returning final response (len={len(cleaned_content)}, cleaned={len(cleaned_content) != len(content)})")
            return cleaned_content or "I apologize, but I couldn't generate a response."

        # Max iterations reached
@ -937,43 +1029,11 @@ async def generate_response(


 def _clean_tool_syntax(content: str) -> str:
-    """Remove tool call syntax from response if partially included."""
-    import re
-    
-    def _remove_json_containing_key(text: str, key: str) -> str:
-        """Remove JSON objects containing a specific key from text."""
-        result = text
-        while key in result:
-            idx = result.find(key)
-            # Walk backwards to find opening {
-            depth = 0
-            obj_start = -1
-            for i in range(idx, -1, -1):
-                if result[i] == '}':
-                    depth += 1
-                elif result[i] == '{':
-                    if depth == 0:
-                        obj_start = i
-                        break
-                    depth -= 1
-            if obj_start == -1:
-                break
-            # Walk forwards to find matching }
-            depth = 0
-            obj_end = -1
-            for i in range(obj_start, len(result)):
-                if result[i] == '{':
-                    depth += 1
-                elif result[i] == '}':
-                    depth -= 1
-                    if depth == 0:
-                        obj_end = i + 1
-                        break
-            if obj_end == -1:
-                break
-            result = result[:obj_start] + result[obj_end:]
-        return result
+    """Remove tool call syntax from response if partially included.

+    Only strips code-fence-wrapped blocks containing tool_call.
+    Does NOT strip bare JSON to avoid accidentally removing valid content.
+    """
    # Remove ```json ... ``` blocks containing tool_call
    def remove_code_block(m):
        block = m.group(0)
@ -982,8 +1042,7 @@ def _clean_tool_syntax(content: str) -> str:
            return ''
        return block

-    cleaned = re.sub(r'```json\s*(.*?)\s*```', remove_code_block, content, flags=re.DOTALL)
-    cleaned = _remove_json_containing_key(cleaned, '"tool_call"')
+    cleaned = re.sub(r'```\w*\s*(.*?)\s*```', remove_code_block, content, flags=re.DOTALL)
    return cleaned.strip()