Switch to mega-tool-call approach for unlimited tool calls
The upstream LLM only supports 2 native tool calls per response, but
the user needs to fire many tools at once. Solution: content-based
'mega tool call' where the LLM bundles ALL tool calls into a single
JSON array in its response text.
Key changes:
- System prompt: tells LLM to output {tool_calls: [...]} array
with ALL needed tools in one block (no native tools param)
- _parse_tool_calls: parses the tool_calls array format (with legacy
tool_call single-object fallback)
- generate_response: NO tools/tool_choice params to API, pure
content-based parsing
- generate_response: executes ALL tools concurrently via asyncio.gather
- generate_response: feeds ALL results back in one consolidated message
- _clean_tool_syntax: strips both tool_calls and tool_call blocks
This commit is contained in:
parent
57228625fc
commit
a2285d3a48
373
main.py
373
main.py
@ -670,16 +670,29 @@ def build_enhanced_messages(
|
|||||||
tool_descriptions = _build_tool_descriptions()
|
tool_descriptions = _build_tool_descriptions()
|
||||||
|
|
||||||
# Add system message with RAG context and tool instructions
|
# Add system message with RAG context and tool instructions
|
||||||
system_content = """You are a helpful AI assistant with access to real-time data through various tools.
|
system_content = f"""You are a helpful AI assistant with access to real-time data through various tools.
|
||||||
|
|
||||||
## AVAILABLE TOOLS
|
## AVAILABLE TOOLS
|
||||||
You have access to tools for getting real-time data. Use them whenever you need current information.
|
{tool_descriptions}
|
||||||
|
|
||||||
|
## HOW TO USE TOOLS
|
||||||
|
When you need to use one or more tools, output a SINGLE JSON block containing ALL tool calls as an array.
|
||||||
|
You MUST bundle every tool call into one response - do NOT respond with just one tool at a time.
|
||||||
|
|
||||||
|
Output EXACTLY this format (nothing else before or after):
|
||||||
|
```json
|
||||||
|
{{"tool_calls": [
|
||||||
|
{{"name": "tool_name", "arguments": {{"arg1": "value1"}}}},
|
||||||
|
{{"name": "another_tool", "arguments": {{"arg2": "value2"}}}}
|
||||||
|
]}}
|
||||||
|
```
|
||||||
|
|
||||||
## IMPORTANT RULES
|
## IMPORTANT RULES
|
||||||
1. ALWAYS use your available tools to get CURRENT data - do NOT say you cannot access real-time data
|
1. ALWAYS use tools to get CURRENT data - do NOT say you cannot access real-time data
|
||||||
2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool
|
2. When asked about stocks, crypto, weather, or news, you MUST use the appropriate tool(s)
|
||||||
3. After receiving tool results, provide a helpful, natural-language response based on the data
|
3. Bundle ALL needed tool calls into a single `tool_calls` array - include every tool you need in one response
|
||||||
4. Be concise and factual - report exact data from tools
|
4. After receiving tool results, provide a helpful, natural-language response based on the data
|
||||||
|
5. Be concise and factual - report exact data from tools
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if download_info and download_info.get("downloaded"):
|
if download_info and download_info.get("downloaded"):
|
||||||
@ -734,95 +747,84 @@ def _build_tool_descriptions() -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _parse_tool_calls(content: str) -> list[dict]:
|
def _parse_tool_calls(content: str) -> list[dict]:
|
||||||
"""Parse tool calls from LLM response content (fallback for models without native tool support).
|
"""Parse tool calls from LLM response content.
|
||||||
|
|
||||||
|
Expects the LLM to output a JSON block like:
|
||||||
|
```json
|
||||||
|
{"tool_calls": [{"name": "tool_name", "arguments": {...}}, ...]}
|
||||||
|
```
|
||||||
|
|
||||||
Returns a list of tool call dicts, each with 'name' and 'arguments' keys.
|
Returns a list of tool call dicts, each with 'name' and 'arguments' keys.
|
||||||
Supports multiple tool calls in a single response.
|
|
||||||
"""
|
"""
|
||||||
tool_calls = []
|
tool_calls = []
|
||||||
|
|
||||||
def _extract_all_json_objects(text: str, start_key: str) -> list[dict]:
|
def _extract_json_object(text: str, start_key: str) -> Optional[dict]:
|
||||||
"""Extract ALL JSON objects containing start_key using brace counting."""
|
"""Extract a JSON object containing start_key using brace counting."""
|
||||||
results = []
|
idx = text.find(start_key)
|
||||||
search_start = 0
|
if idx == -1:
|
||||||
while True:
|
return None
|
||||||
idx = text.find(start_key, search_start)
|
# Walk backwards to find the opening {
|
||||||
if idx == -1:
|
depth = 0
|
||||||
break
|
obj_start = -1
|
||||||
# Walk backwards to find the opening { of this object
|
for i in range(idx, -1, -1):
|
||||||
depth = 0
|
if text[i] == '}':
|
||||||
obj_start = -1
|
depth += 1
|
||||||
for i in range(idx, -1, -1):
|
elif text[i] == '{':
|
||||||
if text[i] == '}':
|
if depth == 0:
|
||||||
depth += 1
|
obj_start = i
|
||||||
elif text[i] == '{':
|
break
|
||||||
if depth == 0:
|
depth -= 1
|
||||||
obj_start = i
|
if obj_start == -1:
|
||||||
break
|
return None
|
||||||
depth -= 1
|
# Walk forwards to find the matching closing }
|
||||||
if obj_start == -1:
|
depth = 0
|
||||||
break
|
obj_end = -1
|
||||||
# Walk forwards to find the matching closing }
|
for i in range(obj_start, len(text)):
|
||||||
depth = 0
|
if text[i] == '{':
|
||||||
obj_end = -1
|
depth += 1
|
||||||
for i in range(obj_start, len(text)):
|
elif text[i] == '}':
|
||||||
if text[i] == '{':
|
depth -= 1
|
||||||
depth += 1
|
if depth == 0:
|
||||||
elif text[i] == '}':
|
obj_end = i + 1
|
||||||
depth -= 1
|
break
|
||||||
if depth == 0:
|
if obj_end == -1:
|
||||||
obj_end = i + 1
|
return None
|
||||||
break
|
try:
|
||||||
if obj_end == -1:
|
return json.loads(text[obj_start:obj_end])
|
||||||
break
|
except json.JSONDecodeError:
|
||||||
try:
|
return None
|
||||||
obj = json.loads(text[obj_start:obj_end])
|
|
||||||
if obj and isinstance(obj, dict):
|
|
||||||
results.append(obj)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
# Move past this object to find the next one
|
|
||||||
search_start = obj_end
|
|
||||||
return results
|
|
||||||
|
|
||||||
# Pattern 1: code fence blocks containing tool_call
|
# --- Pattern 1: {"tool_calls": [...]} in a code fence block ---
|
||||||
fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
|
fence_matches = re.findall(r'```\w*\s*(.*?)\s*```', content, re.DOTALL)
|
||||||
for block_text in fence_matches:
|
for block_text in fence_matches:
|
||||||
if '"tool_call"' in block_text:
|
obj = _extract_json_object(block_text, '"tool_calls"')
|
||||||
objects = _extract_all_json_objects(block_text, '"tool_call"')
|
if obj and "tool_calls" in obj and isinstance(obj["tool_calls"], list):
|
||||||
for obj in objects:
|
for tc in obj["tool_calls"]:
|
||||||
if "tool_call" in obj:
|
|
||||||
tc = obj["tool_call"]
|
|
||||||
if isinstance(tc, dict) and "name" in tc:
|
|
||||||
tool_calls.append(tc)
|
|
||||||
|
|
||||||
# Pattern 2: bare JSON {"tool_call": {...}} outside code fences
|
|
||||||
# Strip code fences first to avoid double-parsing
|
|
||||||
stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
|
|
||||||
if '"tool_call"' in stripped:
|
|
||||||
objects = _extract_all_json_objects(stripped, '"tool_call"')
|
|
||||||
for obj in objects:
|
|
||||||
if "tool_call" in obj:
|
|
||||||
tc = obj["tool_call"]
|
|
||||||
if isinstance(tc, dict) and "name" in tc:
|
if isinstance(tc, dict) and "name" in tc:
|
||||||
# Avoid duplicates
|
tool_calls.append(tc)
|
||||||
if not any(
|
if tool_calls:
|
||||||
existing.get("name") == tc.get("name") and
|
return tool_calls
|
||||||
existing.get("arguments") == tc.get("arguments")
|
|
||||||
for existing in tool_calls
|
|
||||||
):
|
|
||||||
tool_calls.append(tc)
|
|
||||||
|
|
||||||
# Pattern 3: [USE: tool_name args] pattern
|
# --- Pattern 2: {"tool_calls": [...]} bare JSON (outside code fences) ---
|
||||||
bracket_matches = re.findall(r'\[USE:\s*(\w+)\s*(?:args:\s*(\{.*?\}))?\s*\]', content, re.DOTALL)
|
stripped = re.sub(r'```\w*\s*.*?\s*```', '', content, flags=re.DOTALL)
|
||||||
for match in bracket_matches:
|
obj = _extract_json_object(stripped, '"tool_calls"')
|
||||||
name = match[0]
|
if obj and "tool_calls" in obj and isinstance(obj["tool_calls"], list):
|
||||||
args_str = match[1] or "{}"
|
for tc in obj["tool_calls"]:
|
||||||
try:
|
if isinstance(tc, dict) and "name" in tc:
|
||||||
args = json.loads(args_str)
|
tool_calls.append(tc)
|
||||||
except json.JSONDecodeError:
|
if tool_calls:
|
||||||
args = {}
|
return tool_calls
|
||||||
tool_calls.append({"name": name, "arguments": args})
|
|
||||||
|
# --- Pattern 3 (legacy fallback): {"tool_call": {...}} single tool ---
|
||||||
|
# Also support the old format in case the LLM ignores instructions
|
||||||
|
for block_text in fence_matches:
|
||||||
|
obj = _extract_json_object(block_text, '"tool_call"')
|
||||||
|
if obj and "tool_call" in obj and isinstance(obj["tool_call"], dict) and "name" in obj["tool_call"]:
|
||||||
|
tool_calls.append(obj["tool_call"])
|
||||||
|
if not tool_calls:
|
||||||
|
obj = _extract_json_object(stripped, '"tool_call"')
|
||||||
|
if obj and "tool_call" in obj and isinstance(obj["tool_call"], dict) and "name" in obj["tool_call"]:
|
||||||
|
tool_calls.append(obj["tool_call"])
|
||||||
|
|
||||||
return tool_calls
|
return tool_calls
|
||||||
|
|
||||||
@ -832,10 +834,11 @@ async def generate_response(
|
|||||||
temperature: float = 0.7,
|
temperature: float = 0.7,
|
||||||
max_tokens: int = 4096,
|
max_tokens: int = 4096,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generate response using upstream LLM via OpenRouter with native tool calling.
|
"""Generate response using upstream LLM via OpenRouter.
|
||||||
|
|
||||||
Uses OpenAI-compatible `tools` parameter for reliable tool calling.
|
Uses content-based tool calling: the LLM outputs a single JSON block with
|
||||||
Falls back to content-based parsing if the model doesn't support native tools.
|
all tool calls bundled as a `tool_calls` array. This works around model
|
||||||
|
limitations on the number of native tool calls per response.
|
||||||
"""
|
"""
|
||||||
if not state.llm_client:
|
if not state.llm_client:
|
||||||
# Mock response for testing
|
# Mock response for testing
|
||||||
@ -853,32 +856,7 @@ async def generate_response(
|
|||||||
if m.content:
|
if m.content:
|
||||||
messages_dict.append({"role": m.role, "content": m.content})
|
messages_dict.append({"role": m.role, "content": m.content})
|
||||||
|
|
||||||
# Prepare native tool schemas for OpenAI API
|
# Tool calling loop (content-based approach — no `tools` param to API)
|
||||||
native_tools = None
|
|
||||||
if state.tool_manager and config.ENABLE_TOOLS:
|
|
||||||
schemas = state.tool_manager.get_all_schemas()
|
|
||||||
if schemas:
|
|
||||||
native_tools = []
|
|
||||||
for schema in schemas:
|
|
||||||
if isinstance(schema, dict):
|
|
||||||
# Ensure correct OpenAI tools format
|
|
||||||
if schema.get("type") == "function" and "function" in schema:
|
|
||||||
native_tools.append(schema)
|
|
||||||
else:
|
|
||||||
# Wrap bare function schema
|
|
||||||
native_tools.append({
|
|
||||||
"type": "function",
|
|
||||||
"function": schema,
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
log.warning(f"Skipping non-dict tool schema: {schema}")
|
|
||||||
|
|
||||||
if native_tools:
|
|
||||||
log.info(f"Passing {len(native_tools)} tools to LLM API")
|
|
||||||
else:
|
|
||||||
log.info("No native tools available, using content-only mode")
|
|
||||||
|
|
||||||
# Tool calling loop
|
|
||||||
max_iterations = config.MAX_TOOL_ITERATIONS
|
max_iterations = config.MAX_TOOL_ITERATIONS
|
||||||
iteration = 0
|
iteration = 0
|
||||||
|
|
||||||
@ -886,140 +864,78 @@ async def generate_response(
|
|||||||
iteration += 1
|
iteration += 1
|
||||||
log.info(f"LLM call iteration {iteration}")
|
log.info(f"LLM call iteration {iteration}")
|
||||||
|
|
||||||
# Build API call parameters
|
# Call LLM WITHOUT tools parameter — tool instructions are in the system prompt
|
||||||
api_params = {
|
response = await state.llm_client.chat.completions.create(
|
||||||
"model": config.UPSTREAM_MODEL,
|
model=config.UPSTREAM_MODEL,
|
||||||
"messages": messages_dict,
|
messages=messages_dict,
|
||||||
"temperature": temperature,
|
temperature=temperature,
|
||||||
"max_tokens": max_tokens,
|
max_tokens=max_tokens,
|
||||||
}
|
)
|
||||||
if native_tools:
|
|
||||||
api_params["tools"] = native_tools
|
|
||||||
api_params["tool_choice"] = "auto"
|
|
||||||
|
|
||||||
# Call LLM (with retry without tool_choice if model doesn't support it)
|
|
||||||
try:
|
|
||||||
response = await state.llm_client.chat.completions.create(**api_params)
|
|
||||||
except Exception as api_err:
|
|
||||||
err_str = str(api_err).lower()
|
|
||||||
if "tool_choice" in err_str and native_tools:
|
|
||||||
log.warning(f"Model doesn't support tool_choice, retrying without it: {api_err}")
|
|
||||||
del api_params["tool_choice"]
|
|
||||||
response = await state.llm_client.chat.completions.create(**api_params)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
if not response.choices:
|
if not response.choices:
|
||||||
log.warning("No choices in response")
|
log.warning("No choices in response")
|
||||||
return "I apologize, but I couldn't generate a response."
|
return "I apologize, but I couldn't generate a response."
|
||||||
|
|
||||||
choice = response.choices[0]
|
content = response.choices[0].message.content or ""
|
||||||
message = choice.message
|
log.info(f"LLM response: content_len={len(content)}")
|
||||||
content = message.content or ""
|
|
||||||
finish_reason = choice.finish_reason or "stop"
|
|
||||||
|
|
||||||
log.info(f"LLM response: content_len={len(content)}, finish_reason={finish_reason}")
|
# --- Parse tool calls from content ---
|
||||||
|
tool_calls = _parse_tool_calls(content)
|
||||||
|
|
||||||
# --- Handle native tool calls (preferred path) ---
|
if tool_calls:
|
||||||
native_tool_calls = getattr(message, 'tool_calls', None)
|
log.info(f"Parsed {len(tool_calls)} tool calls from content")
|
||||||
|
|
||||||
if native_tool_calls:
|
# Execute ALL tools concurrently
|
||||||
log.info(f"Native tool calls detected: {len(native_tool_calls)}")
|
if state.tool_manager:
|
||||||
|
import asyncio as _asyncio
|
||||||
|
|
||||||
# Build assistant message with tool_calls for conversation history
|
async def _run_tool(tc):
|
||||||
assistant_msg = {
|
name = tc.get("name")
|
||||||
"role": "assistant",
|
args = tc.get("arguments", {})
|
||||||
"content": content if content else None,
|
if not isinstance(args, dict):
|
||||||
"tool_calls": [
|
try:
|
||||||
{
|
args = json.loads(args)
|
||||||
"id": tc.id,
|
except (json.JSONDecodeError, TypeError):
|
||||||
"type": "function",
|
args = {}
|
||||||
"function": {
|
result = await _asyncio.to_thread(
|
||||||
"name": tc.function.name,
|
state.tool_manager.execute_tool, name, args
|
||||||
"arguments": tc.function.arguments or "{}",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for tc in native_tool_calls
|
|
||||||
],
|
|
||||||
}
|
|
||||||
messages_dict.append(assistant_msg)
|
|
||||||
|
|
||||||
# Execute each tool and add result messages
|
|
||||||
for tc in native_tool_calls:
|
|
||||||
tool_name = tc.function.name
|
|
||||||
try:
|
|
||||||
tool_args = json.loads(tc.function.arguments or "{}")
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
log.warning(f"Failed to parse tool arguments for {tool_name}: {tc.function.arguments}")
|
|
||||||
tool_args = {}
|
|
||||||
|
|
||||||
log.info(f"Executing native tool: {tool_name} with args: {tool_args}")
|
|
||||||
|
|
||||||
if state.tool_manager:
|
|
||||||
result = await asyncio.to_thread(
|
|
||||||
state.tool_manager.execute_tool, tool_name, tool_args
|
|
||||||
)
|
)
|
||||||
else:
|
return name, result
|
||||||
result = {"success": False, "error": "No tool manager available"}
|
|
||||||
|
|
||||||
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
|
results = await _asyncio.gather(*[_run_tool(tc) for tc in tool_calls])
|
||||||
|
|
||||||
# Add tool result using proper 'tool' role
|
# Build a single consolidated results block
|
||||||
messages_dict.append({
|
results_text = ""
|
||||||
"role": "tool",
|
for name, result in results:
|
||||||
"tool_call_id": tc.id,
|
log.info(f"Tool {name} result: success={result.get('success', False)}")
|
||||||
"content": json.dumps(result),
|
results_text += f"\n### Tool: {name}\n{json.dumps(result, indent=2)}\n"
|
||||||
})
|
|
||||||
|
|
||||||
continue
|
# Append assistant's tool call message to conversation
|
||||||
|
messages_dict.append({"role": "assistant", "content": content})
|
||||||
|
|
||||||
# --- Fallback: parse tool calls from content (for models without native tool support) ---
|
# Feed ALL results back in one user message
|
||||||
content_tool_calls = _parse_tool_calls(content)
|
|
||||||
|
|
||||||
if content_tool_calls:
|
|
||||||
log.info(f"Content-based tool calls detected: {len(content_tool_calls)}")
|
|
||||||
|
|
||||||
# Add the assistant's raw response to conversation
|
|
||||||
messages_dict.append({"role": "assistant", "content": content})
|
|
||||||
|
|
||||||
for tool_call in content_tool_calls:
|
|
||||||
tool_name = tool_call.get("name")
|
|
||||||
tool_args = tool_call.get("arguments", {})
|
|
||||||
|
|
||||||
if not isinstance(tool_args, dict):
|
|
||||||
try:
|
|
||||||
tool_args = json.loads(tool_args)
|
|
||||||
except (json.JSONDecodeError, TypeError):
|
|
||||||
tool_args = {}
|
|
||||||
|
|
||||||
log.info(f"Executing content-based tool: {tool_name}")
|
|
||||||
|
|
||||||
if state.tool_manager:
|
|
||||||
result = await asyncio.to_thread(
|
|
||||||
state.tool_manager.execute_tool, tool_name, tool_args
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
result = {"success": False, "error": "No tool manager available"}
|
|
||||||
|
|
||||||
log.info(f"Tool {tool_name} result: success={result.get('success', False)}")
|
|
||||||
|
|
||||||
# Feed result back as a user message
|
|
||||||
messages_dict.append({
|
messages_dict.append({
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"--- TOOL RESULT ---\nTool: {tool_name}\nResult: {json.dumps(result, indent=2)}\n\nNow provide a helpful response based on this data.",
|
"content": (
|
||||||
|
f"--- ALL TOOL RESULTS ---\n"
|
||||||
|
f"Executed {len(tool_calls)} tool(s). Results:\n{results_text}\n"
|
||||||
|
f"---\n\n"
|
||||||
|
f"Now provide a helpful response to the original question using ALL the data above."
|
||||||
|
),
|
||||||
})
|
})
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
|
log.warning("Tool call detected but tool_manager is None")
|
||||||
|
|
||||||
# --- No tool calls - return the final response ---
|
# --- No tool calls — return the final response ---
|
||||||
# Light cleanup: only strip code-fence-wrapped tool_call blocks
|
|
||||||
cleaned_content = _clean_tool_syntax(content)
|
cleaned_content = _clean_tool_syntax(content)
|
||||||
log.info(f"Returning final response (len={len(cleaned_content)}, cleaned={len(cleaned_content) != len(content)})")
|
log.info(f"Returning final response (len={len(cleaned_content)})")
|
||||||
return cleaned_content or "I apologize, but I couldn't generate a response."
|
return cleaned_content or "I apologize, but I couldn't generate a response."
|
||||||
|
|
||||||
# Max iterations reached
|
# Max iterations reached
|
||||||
log.warning(f"Max iterations ({max_iterations}) reached")
|
log.warning(f"Max iterations ({max_iterations}) reached")
|
||||||
return "I reached the maximum number of tool calls. Please try a more specific question."
|
return "I reached the maximum number of tool call rounds. Please try a more specific question."
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.error(f"OpenRouter LLM call failed: {e}")
|
log.error(f"OpenRouter LLM call failed: {e}")
|
||||||
@ -1029,16 +945,15 @@ async def generate_response(
|
|||||||
|
|
||||||
|
|
||||||
def _clean_tool_syntax(content: str) -> str:
|
def _clean_tool_syntax(content: str) -> str:
|
||||||
"""Remove tool call syntax from response if partially included.
|
"""Remove tool call JSON blocks from response text.
|
||||||
|
|
||||||
Only strips code-fence-wrapped blocks containing tool_call.
|
Strips code-fence-wrapped blocks containing "tool_calls" or "tool_call".
|
||||||
Does NOT strip bare JSON to avoid accidentally removing valid content.
|
Does NOT strip bare JSON to avoid removing valid content.
|
||||||
"""
|
"""
|
||||||
# Remove ```json ... ``` blocks containing tool_call
|
|
||||||
def remove_code_block(m):
|
def remove_code_block(m):
|
||||||
block = m.group(0)
|
block = m.group(0)
|
||||||
inner = m.group(1)
|
inner = m.group(1)
|
||||||
if '"tool_call"' in inner:
|
if '"tool_calls"' in inner or '"tool_call"' in inner:
|
||||||
return ''
|
return ''
|
||||||
return block
|
return block
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user