fix: Reconstruct full content for notes to match indexed offsets
Notes are indexed as "{title}\n\n{content}" in processor.py but were
being retrieved as just content during chunk expansion, causing
chunk_start_offset and chunk_end_offset to be misaligned.
This fix reconstructs the full content structure when fetching notes
for chunk expansion, ensuring the displayed chunks match the excerpts
shown in search results.
Fixes chunk/excerpt mismatch reported in vector visualization.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -153,7 +153,11 @@ async def _fetch_document_text(
|
||||
if doc_type == "note":
|
||||
# Fetch note by ID
|
||||
note = await nc_client.notes.get_note(note_id=int(doc_id))
|
||||
return note.get("content", "")
|
||||
# Reconstruct full content as indexed: title + "\n\n" + content
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
title = note.get("title", "")
|
||||
content = note.get("content", "")
|
||||
return f"{title}\n\n{content}"
|
||||
elif doc_type == "file":
|
||||
# Fetch file content via WebDAV
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user