Fix tool calling: switch to native OpenAI tools parameter

Problems fixed:
- 'Mega tool call': LLM outputting multiple tool calls that got bundled
  into one. Now uses native OpenAI tools parameter which handles multiple
  tool calls properly via message.tool_calls array.
- 'Returning nothing': _clean_tool_syntax was too aggressive, stripping
  the entire response. Now only strips code-fence-wrapped blocks.
- Tool results were appended to system message growing it unboundedly;
  now uses proper 'tool' role messages in conversation history.

Key changes:
- generate_response: passes tools/tool_choice to OpenAI API (native
  tool calling), with retry without tool_choice for unsupported models
- generate_response: handles multiple tool_calls per response natively
- generate_response: uses proper 'tool' role for results instead of
  appending to system message
- _parse_tool_calls (was _parse_tool_call): now returns a list, supports
  multiple tool calls, used as fallback for models without native tools
- _clean_tool_syntax: much less aggressive, only strips code-fence
  blocks, no longer removes bare JSON (was eating valid responses)
- System prompt: removed JSON format instructions (native tools handles
  format), simplified rules
This commit is contained in:
Z User 2026-03-29 17:57:26 +00:00
parent c03bde8023
commit 57228625fc

389
main.py
View File

@ -670,28 +670,16 @@ def build_enhanced_messages(
tool_descriptions = _build_tool_descriptions() tool_descriptions = _build_tool_descriptions()
# Add system message with RAG context and tool instructions # Add system message with RAG context and tool instructions
system_content = f"""You are a helpful AI assistant with access to real-time data through various tools. system_content = """You are a helpful AI assistant with access to real-time data through various tools.
## AVAILABLE TOOLS ## AVAILABLE TOOLS
{tool_descriptions} You have access to tools for getting real-time data. Use them whenever you need current information.
## HOW TO USE TOOLS
When you need to use a tool, output a JSON block in this EXACT format:
```json
{{"tool_call": {{"name": "tool_name", "arguments": {{"arg1": "value1"}}}}}}
```
For example, to get stock info for AAPL:
```json
{{"tool_call": {{"name": "finance_get_stock_info", "arguments": {{"symbol": "AAPL"}}}}}}
```
## IMPORTANT RULES ## IMPORTANT RULES
1. ALWAYS use tools to get CURRENT data - do NOT say you cannot access real-time data 1. ALWAYS use your available tools to get CURRENT data - do NOT say you cannot access real-time data
2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool 2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool
3. Output ONLY the JSON tool_call block when you need to use a tool 3. After receiving tool results, provide a helpful, natural-language response based on the data
4. After receiving tool results, provide a helpful response based on the data 4. Be concise and factual - report exact data from tools
5. Be concise and factual - report exact data from tools
""" """
if download_info and download_info.get("downloaded"): if download_info and download_info.get("downloaded"):
@ -745,75 +733,98 @@ def _build_tool_descriptions() -> str:
return "\n".join(descriptions) return "\n".join(descriptions)
def _parse_tool_call(content: str) -> Optional[dict]: def _parse_tool_calls(content: str) -> list[dict]:
"""Parse a tool call from LLM response content.""" """Parse tool calls from LLM response content (fallback for models without native tool support).
import re
def _extract_json_object(text: str, start_key: str) -> Optional[dict]: Returns a list of tool call dicts, each with 'name' and 'arguments' keys.
"""Extract a JSON object containing start_key using brace counting.""" Supports multiple tool calls in a single response.
# Find the start of the outermost object containing start_key """
idx = text.find(start_key) tool_calls = []
if idx == -1:
return None
# Walk backwards to find the opening { of this object
depth = 0
obj_start = -1
for i in range(idx, -1, -1):
if text[i] == '}':
depth += 1
elif text[i] == '{':
if depth == 0:
obj_start = i
break
depth -= 1
if obj_start == -1:
return None
# Walk forwards to find the matching closing }
depth = 0
obj_end = -1
for i in range(obj_start, len(text)):
if text[i] == '{':
depth += 1
elif text[i] == '}':
depth -= 1
if depth == 0:
obj_end = i + 1
break
if obj_end == -1:
return None
try:
return json.loads(text[obj_start:obj_end])
except json.JSONDecodeError:
return None
# Pattern 1: code fence blocks (```json, ```, ```JSON, etc.) def _extract_all_json_objects(text: str, start_key: str) -> list[dict]:
# Match any code fence that might contain a tool_call """Extract ALL JSON objects containing start_key using brace counting."""
fence_match = re.search(r'```\w*\s*(.*?)\s*```', content, re.DOTALL) results = []
if fence_match: search_start = 0
block_text = fence_match.group(1) while True:
idx = text.find(start_key, search_start)
if idx == -1:
break
# Walk backwards to find the opening { of this object
depth = 0
obj_start = -1
for i in range(idx, -1, -1):
if text[i] == '}':
depth += 1
elif text[i] == '{':
if depth == 0:
obj_start = i
break
depth -= 1
if obj_start == -1:
break
# Walk forwards to find the matching closing }
depth = 0
obj_end = -1
for i in range(obj_start, len(text)):
if text[i] == '{':
depth += 1
elif text[i] == '}':
depth -= 1
if depth == 0:
obj_end = i + 1
break
if obj_end == -1:
break
try:
obj = json.loads(text[obj_start:obj_end])
if obj and isinstance(obj, dict):
results.append(obj)
except json.JSONDecodeError:
pass
# Move past this object to find the next one
search_start = obj_end
return results
# Pattern 1: code fence blocks containing tool_call
fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
for block_text in fence_matches:
if '"tool_call"' in block_text: if '"tool_call"' in block_text:
data = _extract_json_object(block_text, '"tool_call"') objects = _extract_all_json_objects(block_text, '"tool_call"')
if data and "tool_call" in data: for obj in objects:
return data.get("tool_call") if "tool_call" in obj:
tc = obj["tool_call"]
if isinstance(tc, dict) and "name" in tc:
tool_calls.append(tc)
# Pattern 2: {"tool_call": {...}} anywhere in response (bare JSON) # Pattern 2: bare JSON {"tool_call": {...}} outside code fences
if '"tool_call"' in content: # Strip code fences first to avoid double-parsing
data = _extract_json_object(content, '"tool_call"') stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
if data and "tool_call" in data: if '"tool_call"' in stripped:
return data.get("tool_call") objects = _extract_all_json_objects(stripped, '"tool_call"')
for obj in objects:
if "tool_call" in obj:
tc = obj["tool_call"]
if isinstance(tc, dict) and "name" in tc:
# Avoid duplicates
if not any(
existing.get("name") == tc.get("name") and
existing.get("arguments") == tc.get("arguments")
for existing in tool_calls
):
tool_calls.append(tc)
# Pattern 3: Look for tool name pattern like [USE: tool_name args] # Pattern 3: [USE: tool_name args] pattern
bracket_match = re.search(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL) bracket_matches = re.findall(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL)
if bracket_match: for match in bracket_matches:
name = bracket_match.group(1) name = match[0]
args_str = bracket_match.group(2) or "{}" args_str = match[1] or "{}"
try: try:
args = json.loads(args_str) args = json.loads(args_str)
except json.JSONDecodeError: except json.JSONDecodeError:
args = {} args = {}
return {"name": name, "arguments": args} tool_calls.append({"name": name, "arguments": args})
return None return tool_calls
async def generate_response( async def generate_response(
@ -821,7 +832,11 @@ async def generate_response(
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 4096, max_tokens: int = 4096,
) -> str: ) -> str:
"""Generate response using upstream LLM via OpenRouter with context-based tool calling.""" """Generate response using upstream LLM via OpenRouter with native tool calling.
Uses OpenAI-compatible `tools` parameter for reliable tool calling.
Falls back to content-based parsing if the model doesn't support native tools.
"""
if not state.llm_client: if not state.llm_client:
# Mock response for testing # Mock response for testing
user_msg = "" user_msg = ""
@ -838,91 +853,168 @@ async def generate_response(
if m.content: if m.content:
messages_dict.append({"role": m.role, "content": m.content}) messages_dict.append({"role": m.role, "content": m.content})
# Tool calling loop - NO tools passed to API, tools are in system prompt # Prepare native tool schemas for OpenAI API
native_tools = None
if state.tool_manager and config.ENABLE_TOOLS:
schemas = state.tool_manager.get_all_schemas()
if schemas:
native_tools = []
for schema in schemas:
if isinstance(schema, dict):
# Ensure correct OpenAI tools format
if schema.get("type") == "function" and "function" in schema:
native_tools.append(schema)
else:
# Wrap bare function schema
native_tools.append({
"type": "function",
"function": schema,
})
else:
log.warning(f"Skipping non-dict tool schema: {schema}")
if native_tools:
log.info(f"Passing {len(native_tools)} tools to LLM API")
else:
log.info("No native tools available, using content-only mode")
# Tool calling loop
max_iterations = config.MAX_TOOL_ITERATIONS max_iterations = config.MAX_TOOL_ITERATIONS
iteration = 0 iteration = 0
tool_results = []
while iteration < max_iterations: while iteration < max_iterations:
iteration += 1 iteration += 1
log.info(f"LLM call iteration {iteration}") log.info(f"LLM call iteration {iteration}")
# Call LLM WITHOUT tools parameter - tools are in system prompt # Build API call parameters
response = await state.llm_client.chat.completions.create( api_params = {
model=config.UPSTREAM_MODEL, "model": config.UPSTREAM_MODEL,
messages=messages_dict, "messages": messages_dict,
temperature=temperature, "temperature": temperature,
max_tokens=max_tokens, "max_tokens": max_tokens,
# NO tools parameter - using context-based approach }
) if native_tools:
api_params["tools"] = native_tools
api_params["tool_choice"] = "auto"
log.info(f"LLM response received") # Call LLM (with retry without tool_choice if model doesn't support it)
try:
response = await state.llm_client.chat.completions.create(**api_params)
except Exception as api_err:
err_str = str(api_err).lower()
if "tool_choice" in err_str and native_tools:
log.warning(f"Model doesn't support tool_choice, retrying without it: {api_err}")
del api_params["tool_choice"]
response = await state.llm_client.chat.completions.create(**api_params)
else:
raise
if not response.choices: if not response.choices:
log.warning("No choices in response") log.warning("No choices in response")
return "I apologize, but I couldn't generate a response." return "I apologize, but I couldn't generate a response."
message = response.choices[0].message choice = response.choices[0]
message = choice.message
content = message.content or "" content = message.content or ""
finish_reason = choice.finish_reason or "stop"
log.info(f"Message content length: {len(content)}") log.info(f"LLM response: content_len={len(content)}, finish_reason={finish_reason}")
# Check if response contains a tool call # --- Handle native tool calls (preferred path) ---
tool_call = _parse_tool_call(content) native_tool_calls = getattr(message, 'tool_calls', None)
if tool_call: if native_tool_calls:
tool_name = tool_call.get("name") log.info(f"Native tool calls detected: {len(native_tool_calls)}")
tool_args = tool_call.get("arguments", {})
if state.tool_manager: # Build assistant message with tool_calls for conversation history
log.info(f"Parsed tool call: {tool_name}") assistant_msg = {
"role": "assistant",
"content": content if content else None,
"tool_calls": [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments or "{}",
},
}
for tc in native_tool_calls
],
}
messages_dict.append(assistant_msg)
# Execute the tool (run in thread pool to avoid blocking the event loop) # Execute each tool and add result messages
if isinstance(tool_args, dict): for tc in native_tool_calls:
tool_name = tc.function.name
try:
tool_args = json.loads(tc.function.arguments or "{}")
except json.JSONDecodeError:
log.warning(f"Failed to parse tool arguments for {tool_name}: {tc.function.arguments}")
tool_args = {}
log.info(f"Executing native tool: {tool_name} with args: {tool_args}")
if state.tool_manager:
result = await asyncio.to_thread( result = await asyncio.to_thread(
state.tool_manager.execute_tool, tool_name, tool_args state.tool_manager.execute_tool, tool_name, tool_args
) )
else: else:
result = await asyncio.to_thread( result = {"success": False, "error": "No tool manager available"}
state.tool_manager.execute_tool_from_json, tool_name, json.dumps(tool_args)
)
log.info(f"Tool {tool_name} result: success={result.get('success', False)}") log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
# Store tool result # Add tool result using proper 'tool' role
tool_results.append({
"name": tool_name,
"result": result,
})
# Rebuild system message with tool results
# Find and update the system message
for i, msg in enumerate(messages_dict):
if msg["role"] == "system":
tool_result_text = f"\n\n--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data."
messages_dict[i]["content"] += tool_result_text
break
# Add assistant's tool call as a message
messages_dict.append({ messages_dict.append({
"role": "assistant", "role": "tool",
"content": f"[Executing tool: {tool_name}]" "tool_call_id": tc.id,
"content": json.dumps(result),
}) })
# Add user prompt to continue continue
# --- Fallback: parse tool calls from content (for models without native tool support) ---
content_tool_calls = _parse_tool_calls(content)
if content_tool_calls:
log.info(f"Content-based tool calls detected: {len(content_tool_calls)}")
# Add the assistant's raw response to conversation
messages_dict.append({"role": "assistant", "content": content})
for tool_call in content_tool_calls:
tool_name = tool_call.get("name")
tool_args = tool_call.get("arguments", {})
if not isinstance(tool_args, dict):
try:
tool_args = json.loads(tool_args)
except (json.JSONDecodeError, TypeError):
tool_args = {}
log.info(f"Executing content-based tool: {tool_name}")
if state.tool_manager:
result = await asyncio.to_thread(
state.tool_manager.execute_tool, tool_name, tool_args
)
else:
result = {"success": False, "error": "No tool manager available"}
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
# Feed result back as a user message
messages_dict.append({ messages_dict.append({
"role": "user", "role": "user",
"content": f"The tool {tool_name} returned the above result. Please provide your response to the original question using this data." "content": f"--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data.",
}) })
continue continue
else:
log.warning(f"Tool call detected ({tool_name}) but tool_manager is None! Stripping tool call from response.")
# No tool call found (or tool_manager unavailable) - return the response # --- No tool calls - return the final response ---
# ALWAYS run cleanup to strip any residual tool_call JSON from response # Light cleanup: only strip code-fence-wrapped tool_call blocks
cleaned_content = _clean_tool_syntax(content) cleaned_content = _clean_tool_syntax(content)
log.info(f"Returning final response (cleaned={len(cleaned_content) != len(content)})") log.info(f"Returning final response (len={len(cleaned_content)}, cleaned={len(cleaned_content) != len(content)})")
return cleaned_content or "I apologize, but I couldn't generate a response." return cleaned_content or "I apologize, but I couldn't generate a response."
# Max iterations reached # Max iterations reached
@ -937,43 +1029,11 @@ async def generate_response(
def _clean_tool_syntax(content: str) -> str: def _clean_tool_syntax(content: str) -> str:
"""Remove tool call syntax from response if partially included.""" """Remove tool call syntax from response if partially included.
import re
def _remove_json_containing_key(text: str, key: str) -> str:
"""Remove JSON objects containing a specific key from text."""
result = text
while key in result:
idx = result.find(key)
# Walk backwards to find opening {
depth = 0
obj_start = -1
for i in range(idx, -1, -1):
if result[i] == '}':
depth += 1
elif result[i] == '{':
if depth == 0:
obj_start = i
break
depth -= 1
if obj_start == -1:
break
# Walk forwards to find matching }
depth = 0
obj_end = -1
for i in range(obj_start, len(result)):
if result[i] == '{':
depth += 1
elif result[i] == '}':
depth -= 1
if depth == 0:
obj_end = i + 1
break
if obj_end == -1:
break
result = result[:obj_start] + result[obj_end:]
return result
Only strips code-fence-wrapped blocks containing tool_call.
Does NOT strip bare JSON to avoid accidentally removing valid content.
"""
# Remove ```json ... ``` blocks containing tool_call # Remove ```json ... ``` blocks containing tool_call
def remove_code_block(m): def remove_code_block(m):
block = m.group(0) block = m.group(0)
@ -982,8 +1042,7 @@ def _clean_tool_syntax(content: str) -> str:
return '' return ''
return block return block
cleaned = re.sub(r'```json\s*(.*?)\s*```', remove_code_block, content, flags=re.DOTALL) cleaned = re.sub(r'```\w*\s*(.*?)\s*```', remove_code_block, content, flags=re.DOTALL)
cleaned = _remove_json_containing_key(cleaned, '"tool_call"')
return cleaned.strip() return cleaned.strip()