fix: Reconstruct full content for notes to match indexed offsets

Notes are indexed as "{title}\n\n{content}" in processor.py but were being retrieved as just content during chunk expansion, causing chunk_start_offset and chunk_end_offset to be misaligned. This fix reconstructs the full content structure when fetching notes for chunk expansion, ensuring the displayed chunks match the excerpts shown in search results. Fixes chunk/excerpt mismatch reported in vector visualization. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-20 11:33:12 +01:00
parent 327d843f64
commit f1610bbd2e
1 changed files with 5 additions and 1 deletions
@@ -153,7 +153,11 @@ async def _fetch_document_text(
        if doc_type == "note":
            # Fetch note by ID
            note = await nc_client.notes.get_note(note_id=int(doc_id))
-            return note.get("content", "")
+            # Reconstruct full content as indexed: title + "\n\n" + content
+            # This ensures chunk offsets align with indexed content structure
+            title = note.get("title", "")
+            content = note.get("content", "")
+            return f"{title}\n\n{content}"
        elif doc_type == "file":
            # Fetch file content via WebDAV
            try: