Add detailed logging for debugging tool calling issues

- Log full LLM response object - Log message content and tool calls - Log request start/end with request_id - Add traceback logging for errors
2026-03-29 16:25:44 +00:00 · 2026-03-29 16:25:44 +00:00 · d966f8ea5d
commit d966f8ea5d
parent b811162f78
1 changed files with 20 additions and 2 deletions
--- a/main.py
+++ b/main.py
@ -418,6 +418,7 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]:

 async def complete_chat(request: ChatCompletionRequest, request_id: str) -> ChatCompletionResponse:
    """Process a non-streaming chat completion request."""
+    log.info(f"=== Starting complete_chat for request {request_id} ===")
    messages = request.messages

    # Extract the last user message
@ -430,6 +431,8 @@ async def complete_chat(request: ChatCompletionRequest, request_id: str) -> Chat
    if not user_message:
        raise HTTPException(status_code=400, detail="No user message found")

+    log.info(f"User message: {user_message[:100]}...")
+
    # Step 1: Download website if user is asking about one (BEFORE RAG retrieval)
    download_info = await download_website_if_needed(user_message)
    if download_info.get("downloaded"):
@ -454,11 +457,13 @@ async def complete_chat(request: ChatCompletionRequest, request_id: str) -> Chat
    enhanced_messages = build_enhanced_messages(messages, context, sources, download_info)

    # Step 4: Generate response with upstream LLM
+    log.info(f"Calling generate_response for request {request_id}")
    response_content = await generate_response(
        enhanced_messages,
        temperature=request.temperature,
        max_tokens=request.max_tokens,
    )
+    log.info(f"=== Completed complete_chat for request {request_id} ===")

    # Step 5: Build and return response
    return ChatCompletionResponse(
@ -720,13 +725,18 @@ async def generate_response(
                tool_choice="auto" if tools else None,
            )

+            log.info(f"LLM response received: {response}")
+
            if not response.choices:
+                log.warning("No choices in response")
                return "I apologize, but I couldn't generate a response."

            message = response.choices[0].message
+            log.info(f"Message content: {message.content[:200] if message.content else 'None'}...")
+            log.info(f"Tool calls: {message.tool_calls}")

            # Check if LLM wants to call tools
-            if message.tool_calls:
+            if message.tool_calls and len(message.tool_calls) > 0:
                log.info(f"LLM requested {len(message.tool_calls)} tool calls")

                # Add assistant message with tool calls to history
@ -773,13 +783,21 @@ async def generate_response(
                continue

            # No tool calls - return the final response
-            return message.content or "I apologize, but I couldn't generate a response."
+            if message.content:
+                log.info(f"Returning final response: {message.content[:100]}...")
+                return message.content
+            else:
+                log.warning("No content in message, returning default")
+                return "I apologize, but I couldn't generate a response."

        # Max iterations reached
+        log.warning(f"Max iterations ({max_iterations}) reached")
        return "I reached the maximum number of tool calls. Please try a more specific question."

    except Exception as e:
        log.error(f"OpenRouter LLM call failed: {e}")
+        import traceback
+        log.error(traceback.format_exc())
        return f"I encountered an error: {str(e)}"