From d966f8ea5d87afb4df7a70af7f3ea7466d6f924c Mon Sep 17 00:00:00 2001 From: Z User Date: Sun, 29 Mar 2026 16:25:44 +0000 Subject: [PATCH] Add detailed logging for debugging tool calling issues - Log full LLM response object - Log message content and tool calls - Log request start/end with request_id - Add traceback logging for errors --- main.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/main.py b/main.py index 6719197..31b86ee 100755 --- a/main.py +++ b/main.py @@ -418,6 +418,7 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]: async def complete_chat(request: ChatCompletionRequest, request_id: str) -> ChatCompletionResponse: """Process a non-streaming chat completion request.""" + log.info(f"=== Starting complete_chat for request {request_id} ===") messages = request.messages # Extract the last user message @@ -430,6 +431,8 @@ async def complete_chat(request: ChatCompletionRequest, request_id: str) -> Chat if not user_message: raise HTTPException(status_code=400, detail="No user message found") + log.info(f"User message: {user_message[:100]}...") + # Step 1: Download website if user is asking about one (BEFORE RAG retrieval) download_info = await download_website_if_needed(user_message) if download_info.get("downloaded"): @@ -454,11 +457,13 @@ async def complete_chat(request: ChatCompletionRequest, request_id: str) -> Chat enhanced_messages = build_enhanced_messages(messages, context, sources, download_info) # Step 4: Generate response with upstream LLM + log.info(f"Calling generate_response for request {request_id}") response_content = await generate_response( enhanced_messages, temperature=request.temperature, max_tokens=request.max_tokens, ) + log.info(f"=== Completed complete_chat for request {request_id} ===") # Step 5: Build and return response return ChatCompletionResponse( @@ -720,13 +725,18 @@ async def generate_response( tool_choice="auto" if tools else None, ) + log.info(f"LLM response received: {response}") + if not response.choices: + log.warning("No choices in response") return "I apologize, but I couldn't generate a response." message = response.choices[0].message + log.info(f"Message content: {message.content[:200] if message.content else 'None'}...") + log.info(f"Tool calls: {message.tool_calls}") # Check if LLM wants to call tools - if message.tool_calls: + if message.tool_calls and len(message.tool_calls) > 0: log.info(f"LLM requested {len(message.tool_calls)} tool calls") # Add assistant message with tool calls to history @@ -773,13 +783,21 @@ async def generate_response( continue # No tool calls - return the final response - return message.content or "I apologize, but I couldn't generate a response." + if message.content: + log.info(f"Returning final response: {message.content[:100]}...") + return message.content + else: + log.warning("No content in message, returning default") + return "I apologize, but I couldn't generate a response." # Max iterations reached + log.warning(f"Max iterations ({max_iterations}) reached") return "I reached the maximum number of tool calls. Please try a more specific question." except Exception as e: log.error(f"OpenRouter LLM call failed: {e}") + import traceback + log.error(traceback.format_exc()) return f"I encountered an error: {str(e)}"