""" brain.py — OpenRouter LLM Client (Streaming) Responsibilities: 1. Send transcribed text to OpenRouter with a system prompt that instructs the model to produce: a) A concise verbal response (≤ 2 sentences for voice). b) An optional JSON command block for local execution. 2. Stream tokens back so the TTS engine can start early (Phase 5). 3. Parse any JSON command block and return it alongside the spoken text. Environment Variables: OPENROUTER_API_KEY — your OpenRouter API key OPENROUTER_MODEL — model identifier (default: qwen/qwen-3-235b-a22b) OPENROUTER_BASE_URL — (optional) custom base URL Dependencies: pip install openai """ import json import logging import os from typing import AsyncIterator from openai import AsyncOpenAI logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- DEFAULT_MODEL = "qwen/qwen-3-235b-a22b" SYSTEM_PROMPT = """\ You are Echo, a concise, helpful voice assistant. Follow these rules strictly: 1. **Verbal response**: Reply in ≤ 2 short sentences so it sounds natural when spoken aloud. Be direct and conversational. 2. **Local commands** (optional): If the user's request can be fulfilled by a local OS action (e.g. opening an app, setting a timer, checking the time, creating a reminder), include a single JSON block at the very end of your response using this exact format: ```json {"action": "", "params": {"key": "value"}} ``` Supported actions: open_app, set_timer, get_time, get_date, get_weather, create_reminder, control_volume, shutdown, search_web, calculate. 3. Do NOT include the JSON block in your spoken text. The spoken text is everything BEFORE the JSON block. 4. If no local action is needed, just respond normally without any JSON. 5. Never use markdown formatting, bullet points, or headers in the spoken text. """ class Brain: """Async client for OpenRouter LLM with streaming support.""" def __init__( self, api_key: str | None = None, model: str = DEFAULT_MODEL, base_url: str = "https://openrouter.ai/api/v1", ): self.model = model self.client = AsyncOpenAI( api_key=api_key or os.environ.get("OPENROUTER_API_KEY", ""), base_url=base_url, ) # Conversation history (rolling window) self._history: list[dict[str, str]] = [] self._max_history = 20 # keep last 20 messages for context async def think( self, user_text: str, ) -> AsyncIterator[dict]: """ Stream the LLM response token-by-token. Yields: dict with keys: - "type": "token" | "command" | "done" - "text": the token string (for "token" type) - "command": parsed JSON dict (for "command" type) """ self._history.append({"role": "user", "content": user_text}) if len(self._history) > self._max_history: self._history = self._history[-self._max_history:] messages = [{"role": "system", "content": SYSTEM_PROMPT}] + self._history logger.info("Sending to OpenRouter (%s): %s", self.model, user_text[:80]) full_response = "" try: stream = await self.client.chat.completions.create( model=self.model, messages=messages, stream=True, temperature=0.7, max_tokens=300, ) async for chunk in stream: delta = chunk.choices[0].delta if delta.content: full_response += delta.content yield {"type": "token", "text": delta.content} except Exception: logger.exception("OpenRouter request failed") yield {"type": "token", "text": "Sorry, I had trouble thinking about that."} # Parse any JSON command block from the full response command = self._extract_command(full_response) if command: logger.info("Parsed command: %s", command) yield {"type": "command", "command": command} # Clean spoken text (strip JSON block and thinking tags) spoken_text = self._clean_spoken_text(full_response) self._history.append({"role": "assistant", "content": full_response}) yield {"type": "done", "text": spoken_text} def _extract_command(self, text: str) -> dict | None: """Extract the JSON command block from the LLM response.""" try: # Find JSON code block if "```json" in text: start = text.index("```json") + 7 end = text.index("```", start) json_str = text[start:end].strip() return json.loads(json_str) # Try bare JSON at the end for i in range(len(text) - 1, -1, -1): if text[i] == "{": candidate = text[i:].strip() return json.loads(candidate) except (json.JSONDecodeError, ValueError): pass return None @staticmethod def _clean_spoken_text(text: str) -> str: """Remove JSON blocks and Qwen thinking tags from spoken text.""" import re # Remove Qwen3 blocks cleaned = re.sub(r"]*>.*?", "", text, flags=re.DOTALL) # Remove JSON code blocks cleaned = re.sub(r"```json.*?```", "", cleaned, flags=re.DOTALL) # Remove any trailing bare JSON object cleaned = re.sub(r"\{[^}]*\"action\"[^}]*\}", "", cleaned) # Clean up whitespace cleaned = cleaned.strip().strip(".") return cleaned