diff --git a/main.py b/main.py index 31b86ee..aea01ef 100755 --- a/main.py +++ b/main.py @@ -356,6 +356,16 @@ def should_download_website(message: str, urls: list[str]) -> bool: message_lower = message.lower() + # Skip if this is an automated Open WebUI task (not a real user request) + automated_task_indicators = [ + '### task:', 'generate a concise', 'generate 1-3', + 'suggest 3-5 relevant follow-up', 'summarizing the chat history', + 'categorizing the main themes', 'follow-up questions or prompts', + ] + if any(indicator in message_lower for indicator in automated_task_indicators): + log.info("Skipping website download - appears to be an automated task") + return False + # Keywords indicating user wants website content access_keywords = [ 'go to', 'visit', 'check', 'look at', 'browse', 'open', @@ -391,6 +401,19 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]: for url in urls: try: + # Check if site is already downloaded + site_info = state.rag_system.get_site_info(url) + if site_info: + log.info(f"Site already downloaded: {url} ({site_info.get('chunk_count', 0)} chunks)") + return { + "downloaded": True, + "url": url, + "chunks": site_info.get("chunk_count", 0), + "pages": site_info.get("page_count", 0), + "local_path": site_info.get("local_path"), + "cached": True, + } + log.info(f"Auto-downloading website: {url}") result = await state.rag_system.download_and_ingest_website( url=url,