Fix: Prevent website re-downloads and skip automated tasks
- Skip website download for Open WebUI automated tasks (title, tags, follow-ups) - Check if site already downloaded before re-downloading - Return cached site info if previously downloaded - Reduces unnecessary network calls and processing time
This commit is contained in:
parent
d966f8ea5d
commit
ac0eff1cdd
23
main.py
23
main.py
@ -356,6 +356,16 @@ def should_download_website(message: str, urls: list[str]) -> bool:
|
|||||||
|
|
||||||
message_lower = message.lower()
|
message_lower = message.lower()
|
||||||
|
|
||||||
|
# Skip if this is an automated Open WebUI task (not a real user request)
|
||||||
|
automated_task_indicators = [
|
||||||
|
'### task:', 'generate a concise', 'generate 1-3',
|
||||||
|
'suggest 3-5 relevant follow-up', 'summarizing the chat history',
|
||||||
|
'categorizing the main themes', 'follow-up questions or prompts',
|
||||||
|
]
|
||||||
|
if any(indicator in message_lower for indicator in automated_task_indicators):
|
||||||
|
log.info("Skipping website download - appears to be an automated task")
|
||||||
|
return False
|
||||||
|
|
||||||
# Keywords indicating user wants website content
|
# Keywords indicating user wants website content
|
||||||
access_keywords = [
|
access_keywords = [
|
||||||
'go to', 'visit', 'check', 'look at', 'browse', 'open',
|
'go to', 'visit', 'check', 'look at', 'browse', 'open',
|
||||||
@ -391,6 +401,19 @@ async def download_website_if_needed(user_message: str) -> dict[str, Any]:
|
|||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
try:
|
try:
|
||||||
|
# Check if site is already downloaded
|
||||||
|
site_info = state.rag_system.get_site_info(url)
|
||||||
|
if site_info:
|
||||||
|
log.info(f"Site already downloaded: {url} ({site_info.get('chunk_count', 0)} chunks)")
|
||||||
|
return {
|
||||||
|
"downloaded": True,
|
||||||
|
"url": url,
|
||||||
|
"chunks": site_info.get("chunk_count", 0),
|
||||||
|
"pages": site_info.get("page_count", 0),
|
||||||
|
"local_path": site_info.get("local_path"),
|
||||||
|
"cached": True,
|
||||||
|
}
|
||||||
|
|
||||||
log.info(f"Auto-downloading website: {url}")
|
log.info(f"Auto-downloading website: {url}")
|
||||||
result = await state.rag_system.download_and_ingest_website(
|
result = await state.rag_system.download_and_ingest_website(
|
||||||
url=url,
|
url=url,
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user