Switch to mega-tool-call approach for unlimited tool calls

The upstream LLM only supports 2 native tool calls per response, but
the user needs to fire many tools at once. Solution: content-based
'mega tool call' where the LLM bundles ALL tool calls into a single
JSON array in its response text.

Key changes:
- System prompt: tells LLM to output {tool_calls: [...]} array
  with ALL needed tools in one block (no native tools param)
- _parse_tool_calls: parses the tool_calls array format (with legacy
  tool_call single-object fallback)
- generate_response: NO tools/tool_choice params to API, pure
  content-based parsing
- generate_response: executes ALL tools concurrently via asyncio.gather
- generate_response: feeds ALL results back in one consolidated message
- _clean_tool_syntax: strips both tool_calls and tool_call blocks
This commit is contained in:
Z User 2026-03-29 18:06:39 +00:00
parent 57228625fc
commit a2285d3a48

373
main.py
View File

@ -670,16 +670,29 @@ def build_enhanced_messages(
tool_descriptions = _build_tool_descriptions() tool_descriptions = _build_tool_descriptions()
# Add system message with RAG context and tool instructions # Add system message with RAG context and tool instructions
system_content = """You are a helpful AI assistant with access to real-time data through various tools. system_content = f"""You are a helpful AI assistant with access to real-time data through various tools.
## AVAILABLE TOOLS ## AVAILABLE TOOLS
You have access to tools for getting real-time data. Use them whenever you need current information. {tool_descriptions}
## HOW TO USE TOOLS
When you need to use one or more tools, output a SINGLE JSON block containing ALL tool calls as an array.
You MUST bundle every tool call into one response - do NOT respond with just one tool at a time.
Output EXACTLY this format (nothing else before or after):
```json
{{"tool_calls": [
{{"name": "tool_name", "arguments": {{"arg1": "value1"}}}},
{{"name": "another_tool", "arguments": {{"arg2": "value2"}}}}
]}}
```
## IMPORTANT RULES ## IMPORTANT RULES
1. ALWAYS use your available tools to get CURRENT data - do NOT say you cannot access real-time data 1. ALWAYS use tools to get CURRENT data - do NOT say you cannot access real-time data
2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool 2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool(s)
3. After receiving tool results, provide a helpful, natural-language response based on the data 3. Bundle ALL needed tool calls into a single `tool_calls` array - include every tool you need in one response
4. Be concise and factual - report exact data from tools 4. After receiving tool results, provide a helpful, natural-language response based on the data
5. Be concise and factual - report exact data from tools
""" """
if download_info and download_info.get("downloaded"): if download_info and download_info.get("downloaded"):
@ -734,95 +747,84 @@ def _build_tool_descriptions() -> str:
def _parse_tool_calls(content: str) -> list[dict]: def _parse_tool_calls(content: str) -> list[dict]:
"""Parse tool calls from LLM response content (fallback for models without native tool support). """Parse tool calls from LLM response content.
Expects the LLM to output a JSON block like:
```json
{"tool_calls": [{"name": "tool_name", "arguments": {...}}, ...]}
```
Returns a list of tool call dicts, each with 'name' and 'arguments' keys. Returns a list of tool call dicts, each with 'name' and 'arguments' keys.
Supports multiple tool calls in a single response.
""" """
tool_calls = [] tool_calls = []
def _extract_all_json_objects(text: str, start_key: str) -> list[dict]: def _extract_json_object(text: str, start_key: str) -> Optional[dict]:
"""Extract ALL JSON objects containing start_key using brace counting.""" """Extract a JSON object containing start_key using brace counting."""
results = [] idx = text.find(start_key)
search_start = 0 if idx == -1:
while True: return None
idx = text.find(start_key, search_start) # Walk backwards to find the opening {
if idx == -1: depth = 0
break obj_start = -1
# Walk backwards to find the opening { of this object for i in range(idx, -1, -1):
depth = 0 if text[i] == '}':
obj_start = -1 depth += 1
for i in range(idx, -1, -1): elif text[i] == '{':
if text[i] == '}': if depth == 0:
depth += 1 obj_start = i
elif text[i] == '{': break
if depth == 0: depth -= 1
obj_start = i if obj_start == -1:
break return None
depth -= 1 # Walk forwards to find the matching closing }
if obj_start == -1: depth = 0
break obj_end = -1
# Walk forwards to find the matching closing } for i in range(obj_start, len(text)):
depth = 0 if text[i] == '{':
obj_end = -1 depth += 1
for i in range(obj_start, len(text)): elif text[i] == '}':
if text[i] == '{': depth -= 1
depth += 1 if depth == 0:
elif text[i] == '}': obj_end = i + 1
depth -= 1 break
if depth == 0: if obj_end == -1:
obj_end = i + 1 return None
break try:
if obj_end == -1: return json.loads(text[obj_start:obj_end])
break except json.JSONDecodeError:
try: return None
obj = json.loads(text[obj_start:obj_end])
if obj and isinstance(obj, dict):
results.append(obj)
except json.JSONDecodeError:
pass
# Move past this object to find the next one
search_start = obj_end
return results
# Pattern 1: code fence blocks containing tool_call # --- Pattern 1: {"tool_calls": [...]} in a code fence block ---
fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL) fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
for block_text in fence_matches: for block_text in fence_matches:
if '"tool_call"' in block_text: obj = _extract_json_object(block_text, '"tool_calls"')
objects = _extract_all_json_objects(block_text, '"tool_call"') if obj and "tool_calls" in obj and isinstance(obj["tool_calls"], list):
for obj in objects: for tc in obj["tool_calls"]:
if "tool_call" in obj:
tc = obj["tool_call"]
if isinstance(tc, dict) and "name" in tc:
tool_calls.append(tc)
# Pattern 2: bare JSON {"tool_call": {...}} outside code fences
# Strip code fences first to avoid double-parsing
stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
if '"tool_call"' in stripped:
objects = _extract_all_json_objects(stripped, '"tool_call"')
for obj in objects:
if "tool_call" in obj:
tc = obj["tool_call"]
if isinstance(tc, dict) and "name" in tc: if isinstance(tc, dict) and "name" in tc:
# Avoid duplicates tool_calls.append(tc)
if not any( if tool_calls:
existing.get("name") == tc.get("name") and return tool_calls
existing.get("arguments") == tc.get("arguments")
for existing in tool_calls
):
tool_calls.append(tc)
# Pattern 3: [USE: tool_name args] pattern # --- Pattern 2: {"tool_calls": [...]} bare JSON (outside code fences) ---
bracket_matches = re.findall(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL) stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
for match in bracket_matches: obj = _extract_json_object(stripped, '"tool_calls"')
name = match[0] if obj and "tool_calls" in obj and isinstance(obj["tool_calls"], list):
args_str = match[1] or "{}" for tc in obj["tool_calls"]:
try: if isinstance(tc, dict) and "name" in tc:
args = json.loads(args_str) tool_calls.append(tc)
except json.JSONDecodeError: if tool_calls:
args = {} return tool_calls
tool_calls.append({"name": name, "arguments": args})
# --- Pattern 3 (legacy fallback): {"tool_call": {...}} single tool ---
# Also support the old format in case the LLM ignores instructions
for block_text in fence_matches:
obj = _extract_json_object(block_text, '"tool_call"')
if obj and "tool_call" in obj and isinstance(obj["tool_call"], dict) and "name" in obj["tool_call"]:
tool_calls.append(obj["tool_call"])
if not tool_calls:
obj = _extract_json_object(stripped, '"tool_call"')
if obj and "tool_call" in obj and isinstance(obj["tool_call"], dict) and "name" in obj["tool_call"]:
tool_calls.append(obj["tool_call"])
return tool_calls return tool_calls
@ -832,10 +834,11 @@ async def generate_response(
temperature: float = 0.7, temperature: float = 0.7,
max_tokens: int = 4096, max_tokens: int = 4096,
) -> str: ) -> str:
"""Generate response using upstream LLM via OpenRouter with native tool calling. """Generate response using upstream LLM via OpenRouter.
Uses OpenAI-compatible `tools` parameter for reliable tool calling. Uses content-based tool calling: the LLM outputs a single JSON block with
Falls back to content-based parsing if the model doesn't support native tools. all tool calls bundled as a `tool_calls` array. This works around model
limitations on the number of native tool calls per response.
""" """
if not state.llm_client: if not state.llm_client:
# Mock response for testing # Mock response for testing
@ -853,32 +856,7 @@ async def generate_response(
if m.content: if m.content:
messages_dict.append({"role": m.role, "content": m.content}) messages_dict.append({"role": m.role, "content": m.content})
# Prepare native tool schemas for OpenAI API # Tool calling loop (content-based approach — no `tools` param to API)
native_tools = None
if state.tool_manager and config.ENABLE_TOOLS:
schemas = state.tool_manager.get_all_schemas()
if schemas:
native_tools = []
for schema in schemas:
if isinstance(schema, dict):
# Ensure correct OpenAI tools format
if schema.get("type") == "function" and "function" in schema:
native_tools.append(schema)
else:
# Wrap bare function schema
native_tools.append({
"type": "function",
"function": schema,
})
else:
log.warning(f"Skipping non-dict tool schema: {schema}")
if native_tools:
log.info(f"Passing {len(native_tools)} tools to LLM API")
else:
log.info("No native tools available, using content-only mode")
# Tool calling loop
max_iterations = config.MAX_TOOL_ITERATIONS max_iterations = config.MAX_TOOL_ITERATIONS
iteration = 0 iteration = 0
@ -886,140 +864,78 @@ async def generate_response(
iteration += 1 iteration += 1
log.info(f"LLM call iteration {iteration}") log.info(f"LLM call iteration {iteration}")
# Build API call parameters # Call LLM WITHOUT tools parameter — tool instructions are in the system prompt
api_params = { response = await state.llm_client.chat.completions.create(
"model": config.UPSTREAM_MODEL, model=config.UPSTREAM_MODEL,
"messages": messages_dict, messages=messages_dict,
"temperature": temperature, temperature=temperature,
"max_tokens": max_tokens, max_tokens=max_tokens,
} )
if native_tools:
api_params["tools"] = native_tools
api_params["tool_choice"] = "auto"
# Call LLM (with retry without tool_choice if model doesn't support it)
try:
response = await state.llm_client.chat.completions.create(**api_params)
except Exception as api_err:
err_str = str(api_err).lower()
if "tool_choice" in err_str and native_tools:
log.warning(f"Model doesn't support tool_choice, retrying without it: {api_err}")
del api_params["tool_choice"]
response = await state.llm_client.chat.completions.create(**api_params)
else:
raise
if not response.choices: if not response.choices:
log.warning("No choices in response") log.warning("No choices in response")
return "I apologize, but I couldn't generate a response." return "I apologize, but I couldn't generate a response."
choice = response.choices[0] content = response.choices[0].message.content or ""
message = choice.message log.info(f"LLM response: content_len={len(content)}")
content = message.content or ""
finish_reason = choice.finish_reason or "stop"
log.info(f"LLM response: content_len={len(content)}, finish_reason={finish_reason}") # --- Parse tool calls from content ---
tool_calls = _parse_tool_calls(content)
# --- Handle native tool calls (preferred path) --- if tool_calls:
native_tool_calls = getattr(message, 'tool_calls', None) log.info(f"Parsed {len(tool_calls)} tool calls from content")
if native_tool_calls: # Execute ALL tools concurrently
log.info(f"Native tool calls detected: {len(native_tool_calls)}") if state.tool_manager:
import asyncio as _asyncio
# Build assistant message with tool_calls for conversation history async def _run_tool(tc):
assistant_msg = { name = tc.get("name")
"role": "assistant", args = tc.get("arguments", {})
"content": content if content else None, if not isinstance(args, dict):
"tool_calls": [ try:
{ args = json.loads(args)
"id": tc.id, except (json.JSONDecodeError, TypeError):
"type": "function", args = {}
"function": { result = await _asyncio.to_thread(
"name": tc.function.name, state.tool_manager.execute_tool, name, args
"arguments": tc.function.arguments or "{}",
},
}
for tc in native_tool_calls
],
}
messages_dict.append(assistant_msg)
# Execute each tool and add result messages
for tc in native_tool_calls:
tool_name = tc.function.name
try:
tool_args = json.loads(tc.function.arguments or "{}")
except json.JSONDecodeError:
log.warning(f"Failed to parse tool arguments for {tool_name}: {tc.function.arguments}")
tool_args = {}
log.info(f"Executing native tool: {tool_name} with args: {tool_args}")
if state.tool_manager:
result = await asyncio.to_thread(
state.tool_manager.execute_tool, tool_name, tool_args
) )
else: return name, result
result = {"success": False, "error": "No tool manager available"}
log.info(f"Tool {tool_name} result: success={result.get('success', False)}") results = await _asyncio.gather(*[_run_tool(tc) for tc in tool_calls])
# Add tool result using proper 'tool' role # Build a single consolidated results block
messages_dict.append({ results_text = ""
"role": "tool", for name, result in results:
"tool_call_id": tc.id, log.info(f"Tool {name} result: success={result.get('success', False)}")
"content": json.dumps(result), results_text += f"\n### Tool: {name}\n{json.dumps(result, indent=2)}\n"
})
continue # Append assistant's tool call message to conversation
messages_dict.append({"role": "assistant", "content": content})
# --- Fallback: parse tool calls from content (for models without native tool support) --- # Feed ALL results back in one user message
content_tool_calls = _parse_tool_calls(content)
if content_tool_calls:
log.info(f"Content-based tool calls detected: {len(content_tool_calls)}")
# Add the assistant's raw response to conversation
messages_dict.append({"role": "assistant", "content": content})
for tool_call in content_tool_calls:
tool_name = tool_call.get("name")
tool_args = tool_call.get("arguments", {})
if not isinstance(tool_args, dict):
try:
tool_args = json.loads(tool_args)
except (json.JSONDecodeError, TypeError):
tool_args = {}
log.info(f"Executing content-based tool: {tool_name}")
if state.tool_manager:
result = await asyncio.to_thread(
state.tool_manager.execute_tool, tool_name, tool_args
)
else:
result = {"success": False, "error": "No tool manager available"}
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
# Feed result back as a user message
messages_dict.append({ messages_dict.append({
"role": "user", "role": "user",
"content": f"--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data.", "content": (
f"--- ALL TOOL RESULTS ---\n"
f"Executed {len(tool_calls)} tool(s). Results:\n{results_text}\n"
f"---\n\n"
f"Now provide a helpful response to the original question using ALL the data above."
),
}) })
continue continue
else:
log.warning("Tool call detected but tool_manager is None")
# --- No tool calls - return the final response --- # --- No tool calls — return the final response ---
# Light cleanup: only strip code-fence-wrapped tool_call blocks
cleaned_content = _clean_tool_syntax(content) cleaned_content = _clean_tool_syntax(content)
log.info(f"Returning final response (len={len(cleaned_content)}, cleaned={len(cleaned_content) != len(content)})") log.info(f"Returning final response (len={len(cleaned_content)})")
return cleaned_content or "I apologize, but I couldn't generate a response." return cleaned_content or "I apologize, but I couldn't generate a response."
# Max iterations reached # Max iterations reached
log.warning(f"Max iterations ({max_iterations}) reached") log.warning(f"Max iterations ({max_iterations}) reached")
return "I reached the maximum number of tool calls. Please try a more specific question." return "I reached the maximum number of tool call rounds. Please try a more specific question."
except Exception as e: except Exception as e:
log.error(f"OpenRouter LLM call failed: {e}") log.error(f"OpenRouter LLM call failed: {e}")
@ -1029,16 +945,15 @@ async def generate_response(
def _clean_tool_syntax(content: str) -> str: def _clean_tool_syntax(content: str) -> str:
"""Remove tool call syntax from response if partially included. """Remove tool call JSON blocks from response text.
Only strips code-fence-wrapped blocks containing tool_call. Strips code-fence-wrapped blocks containing "tool_calls" or "tool_call".
Does NOT strip bare JSON to avoid accidentally removing valid content. Does NOT strip bare JSON to avoid removing valid content.
""" """
# Remove ```json ... ``` blocks containing tool_call
def remove_code_block(m): def remove_code_block(m):
block = m.group(0) block = m.group(0)
inner = m.group(1) inner = m.group(1)
if '"tool_call"' in inner: if '"tool_calls"' in inner or '"tool_call"' in inner:
return '' return ''
return block return block