fix: Increase placeholder staleness threshold to 5x scan interval

- Changed from 2x (120s) to 5x (300s) scan interval
- Large PDFs take 3-4 minutes to process, need longer threshold
- Prevents premature requeuing of in-flight documents
This commit is contained in:
Chris Coutinho
2025-11-20 15:36:22 +01:00
parent 47f0b3db9a
commit ec2c274cd9
2 changed files with 17 additions and 12 deletions
@@ -37,14 +37,17 @@ class HealthCheckFilter(logging.Filter):
"""
# Check if the log message contains health check endpoints
message = record.getMessage()
return not any(
endpoint in message
for endpoint in [
"/health/live",
"/health/ready",
"/metrics",
"/app/vector-sync/status",
]
return (
not any(
endpoint in message
for endpoint in [
"/health/live",
"/health/ready",
"/metrics",
"/app/vector-sync/status",
]
)
or "OpenCV not installed. Disabling OCR" in message
)
+6 -4
View File
@@ -266,10 +266,11 @@ async def scan_user_documents(
needs_indexing = True
elif existing_metadata.get("is_placeholder", False):
# Placeholder exists - check if it's stale (processing may have failed)
# Only requeue if placeholder is older than 2x scan interval
# Only requeue if placeholder is older than 5x scan interval
# (Large PDFs can take 3-4 minutes to process)
queued_at = existing_metadata.get("queued_at", 0)
placeholder_age = time.time() - queued_at
stale_threshold = get_settings().vector_sync_scan_interval * 2
stale_threshold = get_settings().vector_sync_scan_interval * 5
if placeholder_age > stale_threshold:
logger.debug(
f"Found stale placeholder for note {doc_id} "
@@ -460,10 +461,11 @@ async def scan_user_documents(
needs_indexing = True
elif existing_metadata.get("is_placeholder", False):
# Placeholder exists - check if it's stale (processing may have failed)
# Only requeue if placeholder is older than 2x scan interval
# Only requeue if placeholder is older than 5x scan interval
# (Large PDFs can take 3-4 minutes to process)
queued_at = existing_metadata.get("queued_at", 0)
placeholder_age = time.time() - queued_at
stale_threshold = get_settings().vector_sync_scan_interval * 2
stale_threshold = get_settings().vector_sync_scan_interval * 5
if placeholder_age > stale_threshold:
logger.debug(
f"Found stale placeholder for file {file_path} (ID: {file_id}) "