docrag/rag/__init__.py
Z User eabdadfb62 Implement full DocRAG server with OpenAI-compatible API
Features:
- FastAPI server with OpenAI-compatible endpoints (/v1/chat/completions, /v1/models)
- RAG system with document processing and vector storage
- Support for multiple document formats (PDF, DOCX, HTML, text, code)
- Streaming response support
- Tool integration with website_downloader
- Document management API endpoints
- GLM-4.7-Flash integration via z-ai-web-dev-sdk
- Works transparently with Open WebUI and other OpenAI clients

Components:
- main.py: FastAPI application with OpenAI-compatible API
- rag/: RAG system (document processor, vector store, retriever)
- tools/: Tool manager with website_downloader integration
- .env.example: Configuration template
2026-03-29 00:57:37 +00:00

253 lines
7.5 KiB
Python

"""
RAG System - Retrieval Augmented Generation
This module provides the core RAG functionality for DocRAG, including:
- Document processing and chunking
- Vector storage and similarity search
- Context retrieval for enhanced prompts
"""
from __future__ import annotations
import asyncio
import logging
import os
from pathlib import Path
from typing import Any, Optional
from .document_processor import DocumentProcessor
from .vector_store import VectorStore
from .retriever import Retriever
log = logging.getLogger(__name__)
class RAGSystem:
"""
Main RAG system that coordinates document processing, storage, and retrieval.
This class provides a unified interface for:
- Adding documents to the knowledge base
- Querying for relevant context
- Managing the document lifecycle
"""
def __init__(
self,
embedding_model: str = "text-embedding-3-small",
vector_store_path: str = "./data/vectors",
documents_path: str = "./data/documents",
chunk_size: int = 1000,
chunk_overlap: int = 200,
):
self.embedding_model = embedding_model
self.vector_store_path = Path(vector_store_path)
self.documents_path = Path(documents_path)
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap
self._initialized = False
self._document_processor: Optional[DocumentProcessor] = None
self._vector_store: Optional[VectorStore] = None
self._retriever: Optional[Retriever] = None
async def initialize(self) -> None:
"""Initialize the RAG system components."""
if self._initialized:
return
log.info("Initializing RAG system...")
# Create directories
self.vector_store_path.mkdir(parents=True, exist_ok=True)
self.documents_path.mkdir(parents=True, exist_ok=True)
# Initialize document processor
self._document_processor = DocumentProcessor(
chunk_size=self.chunk_size,
chunk_overlap=self.chunk_overlap,
)
# Initialize vector store
self._vector_store = VectorStore(
persist_directory=str(self.vector_store_path),
embedding_model=self.embedding_model,
)
# Initialize retriever
self._retriever = Retriever(
vector_store=self._vector_store,
)
self._initialized = True
log.info("RAG system initialized successfully")
async def close(self) -> None:
"""Close the RAG system and release resources."""
if self._vector_store:
await self._vector_store.close()
self._initialized = False
log.info("RAG system closed")
def _ensure_initialized(self) -> None:
"""Ensure the RAG system is initialized."""
if not self._initialized:
raise RuntimeError("RAG system not initialized. Call initialize() first.")
async def add_document(
self,
content: bytes,
filename: str,
metadata: Optional[dict[str, Any]] = None,
) -> dict[str, Any]:
"""
Add a document to the knowledge base.
Args:
content: Raw document content
filename: Original filename
metadata: Optional metadata
Returns:
Dictionary with processing results
"""
self._ensure_initialized()
# Process document
doc_info = await self._document_processor.process(
content=content,
filename=filename,
metadata=metadata,
)
# Store chunks in vector store
if doc_info.get("chunks"):
await self._vector_store.add_chunks(
chunks=doc_info["chunks"],
metadatas=doc_info.get("metadatas", []),
ids=doc_info.get("ids", []),
)
log.info(f"Added document '{filename}' with {len(doc_info.get('chunks', []))} chunks")
return {"chunks": len(doc_info.get("chunks", [])), "document_id": doc_info.get("document_id")}
async def add_document_from_url(self, url: str) -> dict[str, Any]:
"""
Add a document from a URL to the knowledge base.
Args:
url: URL to fetch and process
Returns:
Dictionary with processing results
"""
self._ensure_initialized()
# Fetch content from URL
import aiohttp
async with aiohttp.ClientSession() as session:
async with session.get(url, timeout=30) as response:
response.raise_for_status()
content = await response.read()
# Extract filename from URL
from urllib.parse import urlparse
parsed = urlparse(url)
filename = os.path.basename(parsed.path) or "webpage.html"
return await self.add_document(content=content, filename=filename, metadata={"source_url": url})
async def query(
self,
query: str,
top_k: int = 5,
filter_metadata: Optional[dict] = None,
) -> dict[str, Any]:
"""
Query the knowledge base for relevant context.
Args:
query: Query string
top_k: Number of results to return
filter_metadata: Optional metadata filters
Returns:
Dictionary with context and sources
"""
self._ensure_initialized()
# Retrieve relevant chunks
results = await self._retriever.retrieve(
query=query,
top_k=top_k,
filter_metadata=filter_metadata,
)
# Build context string
context_parts = []
sources = []
for i, result in enumerate(results):
context_parts.append(f"[{i+1}] {result['content']}")
if result.get("metadata", {}).get("source"):
sources.append(result["metadata"]["source"])
context = "\n\n".join(context_parts)
return {
"context": context,
"sources": list(set(sources)),
"num_results": len(results),
"results": results,
}
async def list_documents(self) -> list[dict[str, Any]]:
"""List all documents in the knowledge base."""
self._ensure_initialized()
return await self._vector_store.list_documents()
async def delete_document(self, document_id: str) -> None:
"""Delete a document from the knowledge base."""
self._ensure_initialized()
await self._vector_store.delete_document(document_id)
log.info(f"Deleted document {document_id}")
# Global RAG system instance
_rag_system: Optional[RAGSystem] = None
async def get_rag_system(
embedding_model: str = "text-embedding-3-small",
vector_store_path: str = "./data/vectors",
documents_path: str = "./data/documents",
chunk_size: int = 1000,
chunk_overlap: int = 200,
) -> RAGSystem:
"""
Get or create the global RAG system instance.
Args:
embedding_model: Name of the embedding model
vector_store_path: Path to vector store
documents_path: Path to document storage
chunk_size: Size of document chunks
chunk_overlap: Overlap between chunks
Returns:
Initialized RAGSystem instance
"""
global _rag_system
if _rag_system is None:
_rag_system = RAGSystem(
embedding_model=embedding_model,
vector_store_path=vector_store_path,
documents_path=documents_path,
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
)
await _rag_system.initialize()
return _rag_system