diff --git a/.env.example b/.env.example
index 3c15871..0e76044 100644
--- a/.env.example
+++ b/.env.example
@@ -1,19 +1,21 @@
 # DocRAG Configuration
 # Copy this file to .env and fill in your values
 
-# Server Configuration
+# Server settings
 HOST=0.0.0.0
 PORT=8000
 DEBUG=false
 
-# Model Configuration
-MODEL_NAME=DocRAG-GLM-4.7
-UPSTREAM_MODEL=glm-4.7
+# Model settings
+MODEL_NAME=DocRAG
+UPSTREAM_MODEL=openrouter/free
 
-# API Keys
-ZAI_API_KEY=your-zai-api-key-here
+# OpenRouter API settings (required for LLM responses)
+# Get your API key from https://openrouter.ai/keys
+OPENROUTER_API_KEY=your_openrouter_api_key_here
+OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 
-# RAG Configuration
+# RAG settings
 EMBEDDING_MODEL=text-embedding-3-small
 VECTOR_STORE_PATH=./data/vectors
 DOCUMENTS_PATH=./data/documents
@@ -21,6 +23,6 @@ CHUNK_SIZE=1000
 CHUNK_OVERLAP=200
 TOP_K_RESULTS=5
 
-# Tool Configuration
+# Tool settings
 ENABLE_TOOLS=true
 MAX_TOOL_ITERATIONS=3
diff --git a/main.py b/main.py
index 01e64d1..c79c877 100644
--- a/main.py
+++ b/main.py
@@ -26,6 +26,10 @@ import uuid
 from contextlib import asynccontextmanager
 from typing import Any, AsyncIterator, Optional
 
+# Load environment variables from .env file
+from dotenv import load_dotenv
+load_dotenv()
+
 # Configure logging
 logging.basicConfig(
     level=logging.INFO,
@@ -47,12 +51,8 @@ from rag.document_processor import DocumentProcessor
 # Import tools
 from tools import ToolManager, get_tool_manager
 
-# Import SDK for GLM-4.7-Flash
-try:
-    from zai import ZaiClient as ZAI
-except ImportError:
-    ZAI = None
-    log.warning("z-ai-web-dev-sdk not installed. Install with: pip install z-ai-web-dev-sdk")
+# Import OpenAI client for OpenRouter
+from openai import AsyncOpenAI
 
 
 # =============================================================================
@@ -68,11 +68,12 @@ class Config:
     DEBUG: bool = os.getenv("DEBUG", "false").lower() == "true"
 
     # Model settings
-    MODEL_NAME: str = os.getenv("MODEL_NAME", "DocRAG-GLM-4.7")
-    UPSTREAM_MODEL: str = os.getenv("UPSTREAM_MODEL", "glm-4.7")
+    MODEL_NAME: str = os.getenv("MODEL_NAME", "DocRAG")
+    UPSTREAM_MODEL: str = os.getenv("UPSTREAM_MODEL", "openrouter/free")
 
-    # API Key for upstream LLM
-    ZAI_API_KEY: str = os.getenv("ZAI_API_KEY", "")
+    # OpenRouter API settings
+    OPENROUTER_API_KEY: str = os.getenv("OPENROUTER_API_KEY", "")
+    OPENROUTER_BASE_URL: str = os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
 
     # RAG settings
     EMBEDDING_MODEL: str = os.getenv("EMBEDDING_MODEL", "text-embedding-3-small")
@@ -105,7 +106,7 @@ class ChatMessage(BaseModel):
 
 class ChatCompletionRequest(BaseModel):
     """OpenAI chat completion request format."""
-    model: str = "DocRAG-GLM-4.7"
+    model: str = "DocRAG"
     messages: list[ChatMessage]
     temperature: Optional[float] = 0.7
     top_p: Optional[float] = 1.0
@@ -166,7 +167,7 @@ class AppState:
     """Global application state."""
     rag_system: Optional[RAGSystem] = None
     tool_manager: Optional[ToolManager] = None
-    zai_client: Any = None
+    llm_client: Optional[AsyncOpenAI] = None
     startup_time: float = time.time()
 
 
@@ -206,18 +207,21 @@ async def lifespan(app: FastAPI):
         log.warning(f"Tool manager initialization failed: {e}")
         state.tool_manager = None
 
-    # Initialize ZAI client for upstream LLM
+    # Initialize OpenRouter client for upstream LLM
     try:
-        if config.ZAI_API_KEY and ZAI is not None:
-            log.info("Initializing ZAI client...")
-            state.zai_client = ZAI(api_key=config.ZAI_API_KEY)
-            log.info("ZAI client initialized successfully")
+        if config.OPENROUTER_API_KEY:
+            log.info("Initializing OpenRouter client...")
+            state.llm_client = AsyncOpenAI(
+                api_key=config.OPENROUTER_API_KEY,
+                base_url=config.OPENROUTER_BASE_URL,
+            )
+            log.info(f"OpenRouter client initialized successfully (model: {config.UPSTREAM_MODEL})")
         else:
-            log.warning("No ZAI_API_KEY provided or SDK not installed - using mock responses")
-            state.zai_client = None
+            log.warning("No OPENROUTER_API_KEY provided - using mock responses")
+            state.llm_client = None
     except Exception as e:
-        log.error(f"Failed to initialize ZAI client: {e}")
-        state.zai_client = None
+        log.error(f"Failed to initialize OpenRouter client: {e}")
+        state.llm_client = None
 
     log.info(f"DocRAG server started on {config.HOST}:{config.PORT}")
 
@@ -236,7 +240,7 @@ async def lifespan(app: FastAPI):
 
 app = FastAPI(
     title="DocRAG API",
-    description="OpenAI-compatible RAG server powered by GLM-4.7-Flash",
+    description="OpenAI-compatible RAG server powered by OpenRouter",
     version="1.0.0",
     lifespan=lifespan,
 )
@@ -262,7 +266,7 @@ async def list_models():
     return ModelList(
         data=[
             ModelInfo(id=config.MODEL_NAME, owned_by="docrag"),
-            ModelInfo(id="DocRAG-GLM-4.7", owned_by="docrag"),
+            ModelInfo(id="DocRAG", owned_by="docrag"),
         ]
     )
 
@@ -271,7 +275,7 @@ async def list_models():
 @app.get("/models/{model_id}")
 async def get_model(model_id: str):
     """Get model information (OpenAI-compatible)."""
-    if model_id not in [config.MODEL_NAME, "DocRAG-GLM-4.7"]:
+    if model_id not in [config.MODEL_NAME, "DocRAG"]:
         raise HTTPException(status_code=404, detail="Model not found")
     return ModelInfo(id=model_id, owned_by="docrag")
 
@@ -496,26 +500,18 @@ async def stream_chat_completion(
     created = int(time.time())
 
     try:
-        if state.zai_client:
-            # Use actual GLM-4.7-Flash
-            response = state.zai_client.chat.completions.create(
+        if state.llm_client:
+            # Use OpenRouter with streaming
+            stream = await state.llm_client.chat.completions.create(
                 model=config.UPSTREAM_MODEL,
                 messages=[{"role": m.role, "content": m.content} for m in enhanced_messages if m.content],
                 temperature=request.temperature or 0.7,
                 max_tokens=request.max_tokens or 4096,
                 stream=True,
-                thinking={"type": "enabled"},
             )
 
-            for chunk in response:
-                # Handle reasoning content (thinking)
-                if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
-                    # Don't expose thinking to user - this is internal RAG processing
-                    log.debug(f"Thinking: {chunk.choices[0].delta.reasoning_content[:100]}...")
-                    continue
-
-                # Stream actual content
-                if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
+            async for chunk in stream:
+                if chunk.choices and chunk.choices[0].delta.content:
                     content = chunk.choices[0].delta.content
                     yield f"data: {json.dumps({
                         'id': request_id,
@@ -551,7 +547,7 @@ async def stream_chat_completion(
                 mock_response += f"Processed {download_info.get('pages')} pages into {download_info.get('chunks')} chunks.\n\n"
             if context:
                 mock_response += f"Based on my knowledge base, here's what I found:\n\n{context[:1000]}...\n\n"
-            mock_response += "\n\n[Demo mode - configure ZAI_API_KEY for full LLM responses]"
+            mock_response += "\n\n[Demo mode - configure OPENROUTER_API_KEY for full LLM responses]"
 
             for char in mock_response:
                 yield f"data: {json.dumps({
@@ -629,31 +625,25 @@ async def generate_response(
     temperature: float = 0.7,
     max_tokens: int = 4096,
 ) -> str:
-    """Generate response using upstream LLM."""
-    if state.zai_client:
+    """Generate response using upstream LLM via OpenRouter."""
+    if state.llm_client:
         try:
-            response = state.zai_client.chat.completions.create(
+            response = await state.llm_client.chat.completions.create(
                 model=config.UPSTREAM_MODEL,
                 messages=[{"role": m.role, "content": m.content} for m in messages if m.content],
                 temperature=temperature,
                 max_tokens=max_tokens,
-                stream=False,
-                thinking={"type": "enabled"},
             )
 
             # Extract content from response
-            content = ""
-            for chunk in response:
-                if hasattr(chunk.choices[0], 'message') and chunk.choices[0].message:
-                    content = chunk.choices[0].message.content or ""
-                    break
-                if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
-                    content += chunk.choices[0].delta.content
-
-            return content or "I apologize, but I couldn't generate a response."
+            if response.choices:
+                message_content = response.choices[0].message.content
+                return message_content or "I apologize, but I couldn't generate a response."
+            
+            return "I apologize, but I couldn't generate a response."
 
         except Exception as e:
-            log.error(f"Upstream LLM call failed: {e}")
+            log.error(f"OpenRouter LLM call failed: {e}")
             return f"I encountered an error: {str(e)}"
 
     else:
@@ -663,7 +653,7 @@ async def generate_response(
             if msg.role == "user" and msg.content:
                 user_msg = msg.content
                 break
-        return f"Demo mode response. Your question: {user_msg[:100]}... Configure ZAI_API_KEY for full functionality."
+        return f"Demo mode response. Your question: {user_msg[:100]}... Configure OPENROUTER_API_KEY for full functionality."
 
 
 # =============================================================================
@@ -880,7 +870,7 @@ async def health_check():
         "uptime": time.time() - state.startup_time,
         "rag_enabled": state.rag_system is not None,
         "tools_enabled": state.tool_manager is not None,
-        "llm_connected": state.zai_client is not None,
+        "llm_connected": state.llm_client is not None,
     }
 
 
@@ -890,7 +880,7 @@ async def root():
     return {
         "name": "DocRAG API",
         "version": "1.0.0",
-        "description": "OpenAI-compatible RAG server powered by GLM-4.7-Flash. Auto-downloads and analyzes websites when users ask about them.",
+        "description": "OpenAI-compatible RAG server powered by OpenRouter. Auto-downloads and analyzes websites when users ask about them.",
         "endpoints": {
             "chat": "/v1/chat/completions",
             "models": "/v1/models",
diff --git a/requirements.txt b/requirements.txt
index e948661..bd3b481 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,6 +3,7 @@ fastapi~=0.115.0
 uvicorn[standard]~=0.32.0
 pydantic~=2.10.0
 python-multipart~=0.0.20
+python-dotenv~=1.0.0
 
 # HTTP and async
 aiohttp~=3.11.0
@@ -18,9 +19,8 @@ urllib3~=2.5.0
 PyMuPDF~=1.25.0
 python-docx~=1.1.0
 
-# Optional: For using z-ai-web-dev-sdk with GLM-4.7-Flash
-# Uncomment the following line if you have access to the SDK
-# z-ai-web-dev-sdk>=1.0.0
+# LLM API client (for OpenRouter)
+openai~=1.0.0
 
 # Vector store alternatives (uncomment as needed)
 # chromadb~=0.5.0