Fix: Prevent website re-downloads and skip automated tasks
- Skip website download for Open WebUI automated tasks (title, tags, follow-ups) - Check if site already downloaded before re-downloading - Return cached site info if previously downloaded - Reduces unnecessary network calls and processing time
This commit is contained in:
parent
d966f8ea5d
commit
ac0eff1cdd
23
main.py
23
main.py
@ -356,6 +356,16 @@ def should_download_website(message: str, urls: list[str]) -> bool:
|
||||
|
||||
message_lower = message.lower()
|
||||
|
||||
# Skip if this is an automated Open WebUI task (not a real user request)
|
||||
automated_task_indicators = [
|
||||
'### task:', 'generate a concise', 'generate 1-3',
|
||||
'suggest 3-5 relevant follow-up', 'summarizing the chat history',
|
||||
'categorizing the main themes', 'follow-up questions or prompts',
|
||||
]
|
||||
if any(indicator in message_lower for indicator in automated_task_indicators):
|
||||
log.info("Skipping website download - appears to be an automated task")
|
||||
return False
|
||||
|
||||
# Keywords indicating user wants website content
|
||||
access_keywords = [
|
||||
'go to', 'visit', 'check', 'look at', 'browse', 'open',
|
||||
@ -391,6 +401,19 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]:
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
# Check if site is already downloaded
|
||||
site_info = state.rag_system.get_site_info(url)
|
||||
if site_info:
|
||||
log.info(f"Site already downloaded: {url} ({site_info.get('chunk_count', 0)} chunks)")
|
||||
return {
|
||||
"downloaded": True,
|
||||
"url": url,
|
||||
"chunks": site_info.get("chunk_count", 0),
|
||||
"pages": site_info.get("page_count", 0),
|
||||
"local_path": site_info.get("local_path"),
|
||||
"cached": True,
|
||||
}
|
||||
|
||||
log.info(f"Auto-downloading website: {url}")
|
||||
result = await state.rag_system.download_and_ingest_website(
|
||||
url=url,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user