Fix: Prevent website re-downloads and skip automated tasks

- Skip website download for Open WebUI automated tasks (title, tags, follow-ups) - Check if site already downloaded before re-downloading - Return cached site info if previously downloaded - Reduces unnecessary network calls and processing time
2026-03-29 16:54:38 +00:00 · 2026-03-29 16:54:38 +00:00 · ac0eff1cdd
commit ac0eff1cdd
parent d966f8ea5d
1 changed files with 23 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -356,6 +356,16 @@ def should_download_website(message: str, urls: list[str]) -> bool:
    
    message_lower = message.lower()
    
+    # Skip if this is an automated Open WebUI task (not a real user request)
+    automated_task_indicators = [
+        '### task:', 'generate a concise', 'generate 1-3', 
+        'suggest 3-5 relevant follow-up', 'summarizing the chat history',
+        'categorizing the main themes', 'follow-up questions or prompts',
+    ]
+    if any(indicator in message_lower for indicator in automated_task_indicators):
+        log.info("Skipping website download - appears to be an automated task")
+        return False
+    
    # Keywords indicating user wants website content
    access_keywords = [
        'go to', 'visit', 'check', 'look at', 'browse', 'open',
@ -391,6 +401,19 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]:
    
    for url in urls:
        try:
+            # Check if site is already downloaded
+            site_info = state.rag_system.get_site_info(url)
+            if site_info:
+                log.info(f"Site already downloaded: {url} ({site_info.get('chunk_count', 0)} chunks)")
+                return {
+                    "downloaded": True,
+                    "url": url,
+                    "chunks": site_info.get("chunk_count", 0),
+                    "pages": site_info.get("page_count", 0),
+                    "local_path": site_info.get("local_path"),
+                    "cached": True,
+                }
+            
            log.info(f"Auto-downloading website: {url}")
            result = await state.rag_system.download_and_ingest_website(
                url=url,