fix: Increase placeholder staleness threshold to 5x scan interval
- Changed from 2x (120s) to 5x (300s) scan interval - Large PDFs take 3-4 minutes to process, need longer threshold - Prevents premature requeuing of in-flight documents
This commit is contained in:
@@ -266,10 +266,11 @@ async def scan_user_documents(
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 2x scan interval
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 2
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for note {doc_id} "
|
||||
@@ -460,10 +461,11 @@ async def scan_user_documents(
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 2x scan interval
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 2
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for file {file_path} (ID: {file_id}) "
|
||||
|
||||
Reference in New Issue
Block a user