- server.py: FastAPI + WebSocket server wrapping Brain, TTS, Actions
- WS /ws/chat: streaming chat with token-by-token delivery
- GET /api/audio/{filename}: serve generated TTS audio
- GET /api/health: server status check
- Serves static web UI from web/ directory
- web/: self-contained HTML/CSS/JS frontend
- Responsive chat interface with message bubbles
- WebSocket client for real-time streaming
- Voice input via Web Speech API (mic button)
- TTS audio playback (auto + manual replay)
- Conversation sidebar with history
- Settings panel (voice, TTS, sidebar toggles)
- Dark mode support via prefers-color-scheme
- Updated requirements.txt with fastapi, uvicorn, websockets
- Updated .env.example with SERVER_HOST/PORT config
268 lines
9.5 KiB
Python
268 lines
9.5 KiB
Python
"""
|
|
server.py — Echo Voice Assistant Web Server (FastAPI + WebSocket)
|
|
|
|
Starts a web server on port 8001 that serves:
|
|
- Web UI (static files from web/)
|
|
- WebSocket endpoint for streaming chat
|
|
- REST API for health, audio, and settings
|
|
|
|
Usage:
|
|
python server.py
|
|
# Then open http://localhost:8001 in your browser
|
|
|
|
Environment Variables (see .env.example):
|
|
OPENROUTER_API_KEY — required for LLM responses
|
|
SERVER_PORT — port to run on (default: 8001)
|
|
SERVER_HOST — host to bind to (default: 0.0.0.0)
|
|
|
|
Dependencies:
|
|
pip install fastapi uvicorn python-multipart websockets
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
from dotenv import load_dotenv
|
|
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
|
|
from fastapi.middleware.cors import CORSMiddleware
|
|
from fastapi.responses import FileResponse, HTMLResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
|
|
from brain import Brain
|
|
from tts import TTSEngine
|
|
from actions import execute as execute_action
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Logging
|
|
# ---------------------------------------------------------------------------
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s │ %(name)-18s │ %(levelname)-7s │ %(message)s",
|
|
datefmt="%H:%M:%S",
|
|
)
|
|
logger = logging.getLogger("echo.server")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Config
|
|
# ---------------------------------------------------------------------------
|
|
load_dotenv(Path(__file__).parent / ".env")
|
|
|
|
SERVER_HOST = os.environ.get("SERVER_HOST", "0.0.0.0")
|
|
SERVER_PORT = int(os.environ.get("SERVER_PORT", "8001"))
|
|
WEB_DIR = Path(__file__).parent / "web"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# FastAPI app
|
|
# ---------------------------------------------------------------------------
|
|
app = FastAPI(title="Echo Voice Assistant", version="1.0.0")
|
|
|
|
app.add_middleware(
|
|
CORSMiddleware,
|
|
allow_origins=["*"],
|
|
allow_credentials=True,
|
|
allow_methods=["*"],
|
|
allow_headers=["*"],
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Initialize engines
|
|
# ---------------------------------------------------------------------------
|
|
brain = Brain(
|
|
api_key=os.environ.get("OPENROUTER_API_KEY"),
|
|
model=os.environ.get("OPENROUTER_MODEL", "qwen/qwen-3-235b-a22b"),
|
|
)
|
|
|
|
tts = TTSEngine(
|
|
model_name=os.environ.get("QWEN_TTS_MODEL", "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"),
|
|
voice_sample=os.environ.get("QWEN_TTS_VOICE", "voices/echo_voice.wav"),
|
|
instruction=os.environ.get(
|
|
"QWEN_TTS_INSTRUCT",
|
|
"Speak clearly with a warm, friendly tone. Be natural and conversational.",
|
|
),
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Per-session state
|
|
# ---------------------------------------------------------------------------
|
|
sessions: dict[str, dict] = {}
|
|
|
|
|
|
def get_session(ws_id: str) -> dict:
|
|
if ws_id not in sessions:
|
|
sessions[ws_id] = {"id": ws_id, "history": []}
|
|
return sessions[ws_id]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Routes — Web UI
|
|
# ---------------------------------------------------------------------------
|
|
@app.get("/")
|
|
async def serve_index():
|
|
index = WEB_DIR / "index.html"
|
|
if index.exists():
|
|
return FileResponse(index)
|
|
return HTMLResponse("<h1>Echo Web UI not found — place files in web/ directory</h1>")
|
|
|
|
|
|
# Serve any static files from web/
|
|
@app.get("/{path:path}")
|
|
async def serve_static(path: str):
|
|
file_path = WEB_DIR / path
|
|
if file_path.exists() and file_path.is_file():
|
|
return FileResponse(file_path)
|
|
return HTMLResponse("Not found", status_code=404)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Routes — API
|
|
# ---------------------------------------------------------------------------
|
|
@app.get("/api/health")
|
|
async def health():
|
|
voice_ok = Path(os.environ.get("QWEN_TTS_VOICE", "voices/echo_voice.wav")).exists()
|
|
return {
|
|
"status": "ok",
|
|
"voice_sample": "loaded" if voice_ok else "missing",
|
|
"model": os.environ.get("OPENROUTER_MODEL", "qwen/qwen-3-235b-a22b"),
|
|
}
|
|
|
|
|
|
@app.get("/api/audio/{filename}")
|
|
async def get_audio(filename: str):
|
|
"""Serve a generated audio file."""
|
|
audio_path = Path("audio_output") / filename
|
|
if audio_path.exists():
|
|
return FileResponse(audio_path, media_type="audio/wav")
|
|
return HTMLResponse("Not found", status_code=404)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# WebSocket — Streaming Chat
|
|
# ---------------------------------------------------------------------------
|
|
@app.websocket("/ws/chat")
|
|
async def websocket_chat(ws: WebSocket):
|
|
await ws.accept()
|
|
ws_id = str(uuid.uuid4())[:8]
|
|
session = get_session(ws_id)
|
|
logger.info("Client connected: session=%s", ws_id)
|
|
|
|
try:
|
|
while True:
|
|
data = json.loads(await ws.receive_text())
|
|
msg_type = data.get("type", "chat")
|
|
payload = data.get("payload", {})
|
|
|
|
if msg_type == "chat":
|
|
message = payload.get("message", "").strip()
|
|
if not message:
|
|
await ws.send_json({"type": "error", "text": "Empty message"})
|
|
continue
|
|
|
|
session["history"].append({"role": "user", "content": message})
|
|
|
|
# Stream tokens from the brain
|
|
full_text = ""
|
|
audio_url = None
|
|
pending_command = None
|
|
|
|
try:
|
|
async for event in brain.think(message):
|
|
if event["type"] == "token":
|
|
full_text += event["text"]
|
|
await ws.send_json({
|
|
"type": "token",
|
|
"text": event["text"],
|
|
})
|
|
|
|
elif event["type"] == "command":
|
|
pending_command = event["command"]
|
|
|
|
elif event["type"] == "done":
|
|
spoken = event["text"]
|
|
|
|
# Send completion with final text
|
|
await ws.send_json({
|
|
"type": "done",
|
|
"text": spoken,
|
|
"full_text": full_text,
|
|
})
|
|
|
|
# Store in session history
|
|
session["history"].append({
|
|
"role": "assistant",
|
|
"content": spoken,
|
|
})
|
|
# Keep last 20 messages
|
|
if len(session["history"]) > 20:
|
|
session["history"] = session["history"][-20:]
|
|
|
|
# Execute any local command
|
|
if pending_command:
|
|
action_name = pending_command.get("action", "")
|
|
action_params = pending_command.get("params", {})
|
|
logger.info(
|
|
"Executing action: %s %s", action_name, action_params
|
|
)
|
|
action_result = execute_action(action_name, action_params)
|
|
await ws.send_json({
|
|
"type": "action",
|
|
"action": action_name,
|
|
"result": action_result,
|
|
})
|
|
|
|
# Generate TTS audio (async, non-blocking)
|
|
try:
|
|
wav_path = await tts.generate(spoken)
|
|
if wav_path:
|
|
audio_url = f"/api/audio/{wav_path.name}"
|
|
await ws.send_json({
|
|
"type": "audio",
|
|
"url": audio_url,
|
|
})
|
|
except Exception as exc:
|
|
logger.warning("TTS generation skipped: %s", exc)
|
|
|
|
await ws.send_json({"type": "ready"})
|
|
|
|
except Exception as exc:
|
|
logger.exception("Error processing chat")
|
|
await ws.send_json({
|
|
"type": "error",
|
|
"text": f"Error: {exc}",
|
|
})
|
|
await ws.send_json({"type": "ready"})
|
|
|
|
elif msg_type == "clear":
|
|
session["history"] = []
|
|
await ws.send_json({"type": "cleared"})
|
|
|
|
except WebSocketDisconnect:
|
|
logger.info("Client disconnected: session=%s", ws_id)
|
|
except Exception:
|
|
logger.exception("WebSocket error for session=%s", ws_id)
|
|
finally:
|
|
if ws_id in sessions:
|
|
del sessions[ws_id]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Entry point
|
|
# ---------------------------------------------------------------------------
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
logger.info("=" * 50)
|
|
logger.info(" ECHO VOICE ASSISTANT — Web Server")
|
|
logger.info(" http://%s:%d", SERVER_HOST, SERVER_PORT)
|
|
logger.info("=" * 50)
|
|
|
|
uvicorn.run(
|
|
app,
|
|
host=SERVER_HOST,
|
|
port=SERVER_PORT,
|
|
log_level="warning",
|
|
)
|