fix: Increase placeholder staleness threshold to 5x scan interval
- Changed from 2x (120s) to 5x (300s) scan interval - Large PDFs take 3-4 minutes to process, need longer threshold - Prevents premature requeuing of in-flight documents
This commit is contained in:
@@ -37,14 +37,17 @@ class HealthCheckFilter(logging.Filter):
|
||||
"""
|
||||
# Check if the log message contains health check endpoints
|
||||
message = record.getMessage()
|
||||
return not any(
|
||||
endpoint in message
|
||||
for endpoint in [
|
||||
"/health/live",
|
||||
"/health/ready",
|
||||
"/metrics",
|
||||
"/app/vector-sync/status",
|
||||
]
|
||||
return (
|
||||
not any(
|
||||
endpoint in message
|
||||
for endpoint in [
|
||||
"/health/live",
|
||||
"/health/ready",
|
||||
"/metrics",
|
||||
"/app/vector-sync/status",
|
||||
]
|
||||
)
|
||||
or "OpenCV not installed. Disabling OCR" in message
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -266,10 +266,11 @@ async def scan_user_documents(
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 2x scan interval
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 2
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for note {doc_id} "
|
||||
@@ -460,10 +461,11 @@ async def scan_user_documents(
|
||||
needs_indexing = True
|
||||
elif existing_metadata.get("is_placeholder", False):
|
||||
# Placeholder exists - check if it's stale (processing may have failed)
|
||||
# Only requeue if placeholder is older than 2x scan interval
|
||||
# Only requeue if placeholder is older than 5x scan interval
|
||||
# (Large PDFs can take 3-4 minutes to process)
|
||||
queued_at = existing_metadata.get("queued_at", 0)
|
||||
placeholder_age = time.time() - queued_at
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 2
|
||||
stale_threshold = get_settings().vector_sync_scan_interval * 5
|
||||
if placeholder_age > stale_threshold:
|
||||
logger.debug(
|
||||
f"Found stale placeholder for file {file_path} (ID: {file_id}) "
|
||||
|
||||
Reference in New Issue
Block a user