diff --git a/.env.example b/.env.example index 3c15871..0e76044 100644 --- a/.env.example +++ b/.env.example @@ -1,19 +1,21 @@ # DocRAG Configuration # Copy this file to .env and fill in your values -# Server Configuration +# Server settings HOST=0.0.0.0 PORT=8000 DEBUG=false -# Model Configuration -MODEL_NAME=DocRAG-GLM-4.7 -UPSTREAM_MODEL=glm-4.7 +# Model settings +MODEL_NAME=DocRAG +UPSTREAM_MODEL=openrouter/free -# API Keys -ZAI_API_KEY=your-zai-api-key-here +# OpenRouter API settings (required for LLM responses) +# Get your API key from https://openrouter.ai/keys +OPENROUTER_API_KEY=your_openrouter_api_key_here +OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 -# RAG Configuration +# RAG settings EMBEDDING_MODEL=text-embedding-3-small VECTOR_STORE_PATH=./data/vectors DOCUMENTS_PATH=./data/documents @@ -21,6 +23,6 @@ CHUNK_SIZE=1000 CHUNK_OVERLAP=200 TOP_K_RESULTS=5 -# Tool Configuration +# Tool settings ENABLE_TOOLS=true MAX_TOOL_ITERATIONS=3 diff --git a/main.py b/main.py index 01e64d1..c79c877 100644 --- a/main.py +++ b/main.py @@ -26,6 +26,10 @@ import uuid from contextlib import asynccontextmanager from typing import Any, AsyncIterator, Optional +# Load environment variables from .env file +from dotenv import load_dotenv +load_dotenv() + # Configure logging logging.basicConfig( level=logging.INFO, @@ -47,12 +51,8 @@ from rag.document_processor import DocumentProcessor # Import tools from tools import ToolManager, get_tool_manager -# Import SDK for GLM-4.7-Flash -try: - from zai import ZaiClient as ZAI -except ImportError: - ZAI = None - log.warning("z-ai-web-dev-sdk not installed. Install with: pip install z-ai-web-dev-sdk") +# Import OpenAI client for OpenRouter +from openai import AsyncOpenAI # ============================================================================= @@ -68,11 +68,12 @@ class Config: DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true" # Model settings - MODEL_NAME: str = os.getenv("MODEL_NAME", "DocRAG-GLM-4.7") - UPSTREAM_MODEL: str = os.getenv("UPSTREAM_MODEL", "glm-4.7") + MODEL_NAME: str = os.getenv("MODEL_NAME", "DocRAG") + UPSTREAM_MODEL: str = os.getenv("UPSTREAM_MODEL", "openrouter/free") - # API Key for upstream LLM - ZAI_API_KEY: str = os.getenv("ZAI_API_KEY", "") + # OpenRouter API settings + OPENROUTER_API_KEY: str = os.getenv("OPENROUTER_API_KEY", "") + OPENROUTER_BASE_URL: str = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1") # RAG settings EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small") @@ -105,7 +106,7 @@ class ChatMessage(BaseModel): class ChatCompletionRequest(BaseModel): """OpenAI chat completion request format.""" - model: str = "DocRAG-GLM-4.7" + model: str = "DocRAG" messages: list[ChatMessage] temperature: Optional[float] = 0.7 top_p: Optional[float] = 1.0 @@ -166,7 +167,7 @@ class AppState: """Global application state.""" rag_system: Optional[RAGSystem] = None tool_manager: Optional[ToolManager] = None - zai_client: Any = None + llm_client: Optional[AsyncOpenAI] = None startup_time: float = time.time() @@ -206,18 +207,21 @@ async def lifespan(app: FastAPI): log.warning(f"Tool manager initialization failed: {e}") state.tool_manager = None - # Initialize ZAI client for upstream LLM + # Initialize OpenRouter client for upstream LLM try: - if config.ZAI_API_KEY and ZAI is not None: - log.info("Initializing ZAI client...") - state.zai_client = ZAI(api_key=config.ZAI_API_KEY) - log.info("ZAI client initialized successfully") + if config.OPENROUTER_API_KEY: + log.info("Initializing OpenRouter client...") + state.llm_client = AsyncOpenAI( + api_key=config.OPENROUTER_API_KEY, + base_url=config.OPENROUTER_BASE_URL, + ) + log.info(f"OpenRouter client initialized successfully (model: {config.UPSTREAM_MODEL})") else: - log.warning("No ZAI_API_KEY provided or SDK not installed - using mock responses") - state.zai_client = None + log.warning("No OPENROUTER_API_KEY provided - using mock responses") + state.llm_client = None except Exception as e: - log.error(f"Failed to initialize ZAI client: {e}") - state.zai_client = None + log.error(f"Failed to initialize OpenRouter client: {e}") + state.llm_client = None log.info(f"DocRAG server started on {config.HOST}:{config.PORT}") @@ -236,7 +240,7 @@ async def lifespan(app: FastAPI): app = FastAPI( title="DocRAG API", - description="OpenAI-compatible RAG server powered by GLM-4.7-Flash", + description="OpenAI-compatible RAG server powered by OpenRouter", version="1.0.0", lifespan=lifespan, ) @@ -262,7 +266,7 @@ async def list_models(): return ModelList( data=[ ModelInfo(id=config.MODEL_NAME, owned_by="docrag"), - ModelInfo(id="DocRAG-GLM-4.7", owned_by="docrag"), + ModelInfo(id="DocRAG", owned_by="docrag"), ] ) @@ -271,7 +275,7 @@ async def list_models(): @app.get("/models/{model_id}") async def get_model(model_id: str): """Get model information (OpenAI-compatible).""" - if model_id not in [config.MODEL_NAME, "DocRAG-GLM-4.7"]: + if model_id not in [config.MODEL_NAME, "DocRAG"]: raise HTTPException(status_code=404, detail="Model not found") return ModelInfo(id=model_id, owned_by="docrag") @@ -496,26 +500,18 @@ async def stream_chat_completion( created = int(time.time()) try: - if state.zai_client: - # Use actual GLM-4.7-Flash - response = state.zai_client.chat.completions.create( + if state.llm_client: + # Use OpenRouter with streaming + stream = await state.llm_client.chat.completions.create( model=config.UPSTREAM_MODEL, messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content], temperature=request.temperature or 0.7, max_tokens=request.max_tokens or 4096, stream=True, - thinking={"type": "enabled"}, ) - for chunk in response: - # Handle reasoning content (thinking) - if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content: - # Don't expose thinking to user - this is internal RAG processing - log.debug(f"Thinking: {chunk.choices[0].delta.reasoning_content[:100]}...") - continue - - # Stream actual content - if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content: + async for chunk in stream: + if chunk.choices and chunk.choices[0].delta.content: content = chunk.choices[0].delta.content yield f"data: {json.dumps({ 'id': request_id, @@ -551,7 +547,7 @@ async def stream_chat_completion( mock_response += f"Processed {download_info.get('pages')} pages into {download_info.get('chunks')} chunks.\n\n" if context: mock_response += f"Based on my knowledge base, here's what I found:\n\n{context[:1000]}...\n\n" - mock_response += "\n\n[Demo mode - configure ZAI_API_KEY for full LLM responses]" + mock_response += "\n\n[Demo mode - configure OPENROUTER_API_KEY for full LLM responses]" for char in mock_response: yield f"data: {json.dumps({ @@ -629,31 +625,25 @@ async def generate_response( temperature: float = 0.7, max_tokens: int = 4096, ) -> str: - """Generate response using upstream LLM.""" - if state.zai_client: + """Generate response using upstream LLM via OpenRouter.""" + if state.llm_client: try: - response = state.zai_client.chat.completions.create( + response = await state.llm_client.chat.completions.create( model=config.UPSTREAM_MODEL, messages=[{"role": m.role, "content": m.content} for m in messages if m.content], temperature=temperature, max_tokens=max_tokens, - stream=False, - thinking={"type": "enabled"}, ) # Extract content from response - content = "" - for chunk in response: - if hasattr(chunk.choices[0], 'message') and chunk.choices[0].message: - content = chunk.choices[0].message.content or "" - break - if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content: - content += chunk.choices[0].delta.content - - return content or "I apologize, but I couldn't generate a response." + if response.choices: + message_content = response.choices[0].message.content + return message_content or "I apologize, but I couldn't generate a response." + + return "I apologize, but I couldn't generate a response." except Exception as e: - log.error(f"Upstream LLM call failed: {e}") + log.error(f"OpenRouter LLM call failed: {e}") return f"I encountered an error: {str(e)}" else: @@ -663,7 +653,7 @@ async def generate_response( if msg.role == "user" and msg.content: user_msg = msg.content break - return f"Demo mode response. Your question: {user_msg[:100]}... Configure ZAI_API_KEY for full functionality." + return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality." # ============================================================================= @@ -880,7 +870,7 @@ async def health_check(): "uptime": time.time() - state.startup_time, "rag_enabled": state.rag_system is not None, "tools_enabled": state.tool_manager is not None, - "llm_connected": state.zai_client is not None, + "llm_connected": state.llm_client is not None, } @@ -890,7 +880,7 @@ async def root(): return { "name": "DocRAG API", "version": "1.0.0", - "description": "OpenAI-compatible RAG server powered by GLM-4.7-Flash. Auto-downloads and analyzes websites when users ask about them.", + "description": "OpenAI-compatible RAG server powered by OpenRouter. Auto-downloads and analyzes websites when users ask about them.", "endpoints": { "chat": "/v1/chat/completions", "models": "/v1/models", diff --git a/requirements.txt b/requirements.txt index e948661..bd3b481 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ fastapi~=0.115.0 uvicorn[standard]~=0.32.0 pydantic~=2.10.0 python-multipart~=0.0.20 +python-dotenv~=1.0.0 # HTTP and async aiohttp~=3.11.0 @@ -18,9 +19,8 @@ urllib3~=2.5.0 PyMuPDF~=1.25.0 python-docx~=1.1.0 -# Optional: For using z-ai-web-dev-sdk with GLM-4.7-Flash -# Uncomment the following line if you have access to the SDK -# z-ai-web-dev-sdk>=1.0.0 +# LLM API client (for OpenRouter) +openai~=1.0.0 # Vector store alternatives (uncomment as needed) # chromadb~=0.5.0