Fix: Prevent website re-downloads and skip automated tasks

- Skip website download for Open WebUI automated tasks (title, tags, follow-ups)
- Check if site already downloaded before re-downloading
- Return cached site info if previously downloaded
- Reduces unnecessary network calls and processing time
This commit is contained in:
Z User 2026-03-29 16:54:38 +00:00
parent d966f8ea5d
commit ac0eff1cdd

23
main.py
View File

@ -356,6 +356,16 @@ def should_download_website(message: str, urls: list[str]) -> bool:
message_lower = message.lower()
# Skip if this is an automated Open WebUI task (not a real user request)
automated_task_indicators = [
'### task:', 'generate a concise', 'generate 1-3',
'suggest 3-5 relevant follow-up', 'summarizing the chat history',
'categorizing the main themes', 'follow-up questions or prompts',
]
if any(indicator in message_lower for indicator in automated_task_indicators):
log.info("Skipping website download - appears to be an automated task")
return False
# Keywords indicating user wants website content
access_keywords = [
'go to', 'visit', 'check', 'look at', 'browse', 'open',
@ -391,6 +401,19 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]:
for url in urls:
try:
# Check if site is already downloaded
site_info = state.rag_system.get_site_info(url)
if site_info:
log.info(f"Site already downloaded: {url} ({site_info.get('chunk_count', 0)} chunks)")
return {
"downloaded": True,
"url": url,
"chunks": site_info.get("chunk_count", 0),
"pages": site_info.get("page_count", 0),
"local_path": site_info.get("local_path"),
"cached": True,
}
log.info(f"Auto-downloading website: {url}")
result = await state.rag_system.download_and_ingest_website(
url=url,