fix(vector): Handle missing 'modified' field in notes gracefully

The vector scanner crashed when encountering notes without a 'modified' field,
causing KeyError and preventing initial sync from completing.

Changes:
- Use dict.get() with fallback value (0) instead of direct key access
- Log warnings for notes missing 'modified' field
- Apply fix to both initial sync and incremental sync code paths

This ensures the scanner continues processing all notes even if some have
missing metadata fields, preventing scanner crashes that could affect
deployment readiness.

Fixes: Notes without 'modified' field causing scanner crash and readiness check failure

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Chris Coutinho
2025-11-09 09:03:05 +01:00
parent 578de4d7d6
commit 7be40a33e1
+17 -3
View File
@@ -105,13 +105,20 @@ async def scan_user_documents(
if initial_sync:
# Send everything on first sync
for note in notes:
# Handle missing 'modified' field (use 0 as fallback)
modified_at = note.get("modified", 0)
if modified_at == 0:
logger.warning(
f"Note {note['id']} missing 'modified' field, using 0 as fallback"
)
await send_stream.send(
DocumentTask(
user_id=user_id,
doc_id=str(note["id"]),
doc_type="note",
operation="index",
modified_at=note["modified"],
modified_at=modified_at,
)
)
logger.info(f"Sent {len(notes)} documents for initial sync: {user_id}")
@@ -147,6 +154,13 @@ async def scan_user_documents(
doc_id = str(note["id"])
indexed_at = indexed_docs.get(doc_id)
# Handle missing 'modified' field (use 0 as fallback)
modified_at = note.get("modified", 0)
if modified_at == 0:
logger.warning(
f"Note {doc_id} missing 'modified' field, using 0 as fallback"
)
# If document reappeared, remove from potentially_deleted
doc_key = (user_id, doc_id)
if doc_key in _potentially_deleted:
@@ -156,14 +170,14 @@ async def scan_user_documents(
del _potentially_deleted[doc_key]
# Send if never indexed or modified since last index
if indexed_at is None or note["modified"] > indexed_at:
if indexed_at is None or modified_at > indexed_at:
await send_stream.send(
DocumentTask(
user_id=user_id,
doc_id=doc_id,
doc_type="note",
operation="index",
modified_at=note["modified"],
modified_at=modified_at,
)
)
queued += 1