Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 54fdc8addc | |||
| e0320e761c |
@@ -209,6 +209,64 @@ async def _get_file_path_from_qdrant(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def _get_deck_metadata_from_qdrant(
|
||||||
|
user_id: str, card_id: int
|
||||||
|
) -> dict[str, int] | None:
|
||||||
|
"""Retrieve board_id and stack_id for a deck card from Qdrant payload.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
user_id: User ID who owns the card
|
||||||
|
card_id: Card ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with board_id and stack_id, or None if not found
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||||
|
|
||||||
|
from nextcloud_mcp_server.config import get_settings
|
||||||
|
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||||
|
|
||||||
|
qdrant_client = await get_qdrant_client()
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Query for any chunk of this card (we just need metadata)
|
||||||
|
scroll_result = await qdrant_client.scroll(
|
||||||
|
collection_name=settings.get_collection_name(),
|
||||||
|
scroll_filter=Filter(
|
||||||
|
must=[
|
||||||
|
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||||
|
FieldCondition(key="doc_id", match=MatchValue(value=card_id)),
|
||||||
|
FieldCondition(key="doc_type", match=MatchValue(value="deck_card")),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
limit=1,
|
||||||
|
with_payload=["board_id", "stack_id"],
|
||||||
|
with_vectors=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
if scroll_result[0]:
|
||||||
|
point = scroll_result[0][0]
|
||||||
|
board_id = point.payload.get("board_id")
|
||||||
|
stack_id = point.payload.get("stack_id")
|
||||||
|
if board_id is not None and stack_id is not None:
|
||||||
|
logger.debug(
|
||||||
|
f"Retrieved deck metadata for card {card_id}: "
|
||||||
|
f"board_id={board_id}, stack_id={stack_id}"
|
||||||
|
)
|
||||||
|
return {"board_id": int(board_id), "stack_id": int(stack_id)}
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Could not find deck metadata in Qdrant for card {card_id} "
|
||||||
|
f"(might be legacy data without board_id/stack_id)"
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error querying Qdrant for deck metadata: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ChunkContext:
|
class ChunkContext:
|
||||||
"""Expanded chunk with surrounding context and position markers.
|
"""Expanded chunk with surrounding context and position markers.
|
||||||
@@ -394,7 +452,9 @@ async def get_chunk_with_context(
|
|||||||
logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}")
|
logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}")
|
||||||
|
|
||||||
# Fetch full document text
|
# Fetch full document text
|
||||||
full_text = await _fetch_document_text(nc_client, resolved_doc_id, doc_type)
|
full_text = await _fetch_document_text(
|
||||||
|
nc_client, resolved_doc_id, doc_type, user_id
|
||||||
|
)
|
||||||
if full_text is None:
|
if full_text is None:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Could not fetch document text for {doc_type} {doc_id}, "
|
f"Could not fetch document text for {doc_type} {doc_id}, "
|
||||||
@@ -453,7 +513,7 @@ async def get_chunk_with_context(
|
|||||||
|
|
||||||
|
|
||||||
async def _fetch_document_text(
|
async def _fetch_document_text(
|
||||||
nc_client: NextcloudClient, doc_id: str | int, doc_type: str
|
nc_client: NextcloudClient, doc_id: str | int, doc_type: str, user_id: str
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
"""Fetch full text content of a document.
|
"""Fetch full text content of a document.
|
||||||
|
|
||||||
@@ -546,44 +606,71 @@ async def _fetch_document_text(
|
|||||||
return "\n".join(content_parts)
|
return "\n".join(content_parts)
|
||||||
elif doc_type == "deck_card":
|
elif doc_type == "deck_card":
|
||||||
# Fetch card from Deck API
|
# Fetch card from Deck API
|
||||||
# Note: Deck API requires board_id and stack_id, but we don't store those
|
# Try to get board_id/stack_id from Qdrant metadata (O(1) lookup)
|
||||||
# We need to search through boards to find the card (same as processor.py)
|
# Otherwise fall back to iteration (legacy data)
|
||||||
boards = await nc_client.deck.get_boards()
|
card = None
|
||||||
card_found = False
|
deck_metadata = await _get_deck_metadata_from_qdrant(user_id, int(doc_id))
|
||||||
|
|
||||||
for board in boards:
|
if deck_metadata:
|
||||||
if card_found:
|
# Fast path: Direct lookup with known board_id/stack_id
|
||||||
break
|
board_id = deck_metadata["board_id"]
|
||||||
|
stack_id = deck_metadata["stack_id"]
|
||||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
try:
|
||||||
if board.deletedAt > 0:
|
card = await nc_client.deck.get_card(
|
||||||
logger.debug(
|
board_id=board_id, stack_id=stack_id, card_id=int(doc_id)
|
||||||
f"Skipping deleted board {board.id} while searching for card {doc_id}"
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"Retrieved deck card {doc_id} using metadata "
|
||||||
|
f"(board_id={board_id}, stack_id={stack_id})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to fetch card with metadata (board_id={board_id}, "
|
||||||
|
f"stack_id={stack_id}, card_id={doc_id}): {e}, falling back to iteration"
|
||||||
)
|
)
|
||||||
continue
|
|
||||||
|
|
||||||
stacks = await nc_client.deck.get_stacks(board.id)
|
# Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed)
|
||||||
|
if card is None:
|
||||||
|
boards = await nc_client.deck.get_boards()
|
||||||
|
card_found = False
|
||||||
|
|
||||||
for stack in stacks:
|
for board in boards:
|
||||||
if card_found:
|
if card_found:
|
||||||
break
|
break
|
||||||
if stack.cards:
|
|
||||||
for card in stack.cards:
|
|
||||||
if card.id == int(doc_id):
|
|
||||||
# Reconstruct full content as indexed: title + "\n\n" + description
|
|
||||||
# This ensures chunk offsets align with indexed content structure
|
|
||||||
content_parts = [card.title]
|
|
||||||
if card.description:
|
|
||||||
content_parts.append(card.description)
|
|
||||||
card_found = True
|
|
||||||
logger.debug(
|
|
||||||
f"Found deck card {doc_id} in board {board.id}, stack {stack.id}"
|
|
||||||
)
|
|
||||||
return "\n\n".join(content_parts)
|
|
||||||
|
|
||||||
# Card not found (might be archived or deleted)
|
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||||
logger.warning(f"Deck card {doc_id} not found in any board/stack")
|
if board.deletedAt > 0:
|
||||||
return None
|
logger.debug(
|
||||||
|
f"Skipping deleted board {board.id} while searching for card {doc_id}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
stacks = await nc_client.deck.get_stacks(board.id)
|
||||||
|
|
||||||
|
for stack in stacks:
|
||||||
|
if card_found:
|
||||||
|
break
|
||||||
|
if stack.cards:
|
||||||
|
for c in stack.cards:
|
||||||
|
if c.id == int(doc_id):
|
||||||
|
card = c
|
||||||
|
card_found = True
|
||||||
|
logger.debug(
|
||||||
|
f"Found deck card {doc_id} in board {board.id}, "
|
||||||
|
f"stack {stack.id} (fallback iteration)"
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
if not card_found:
|
||||||
|
logger.warning(f"Deck card {doc_id} not found in any board/stack")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Reconstruct full content as indexed: title + "\n\n" + description
|
||||||
|
# This ensures chunk offsets align with indexed content structure
|
||||||
|
content_parts = [card.title]
|
||||||
|
if card.description:
|
||||||
|
content_parts.append(card.description)
|
||||||
|
return "\n\n".join(content_parts)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Unsupported doc_type for context expansion: {doc_type}")
|
logger.warning(f"Unsupported doc_type for context expansion: {doc_type}")
|
||||||
return None
|
return None
|
||||||
|
|||||||
@@ -314,62 +314,95 @@ async def _index_document(
|
|||||||
content_type = None
|
content_type = None
|
||||||
elif doc_task.doc_type == "deck_card":
|
elif doc_task.doc_type == "deck_card":
|
||||||
# Fetch card from Deck API
|
# Fetch card from Deck API
|
||||||
# Note: We need board_id and stack_id to fetch the card
|
# Use metadata from scanner if available (O(1) lookup)
|
||||||
# For now, we'll need to get all boards and find the card
|
# Otherwise fall back to iteration (legacy data)
|
||||||
# This is not optimal, but Deck API requires board_id and stack_id
|
card = None
|
||||||
boards = await nc_client.deck.get_boards()
|
board = None
|
||||||
card_found = False
|
stack = None
|
||||||
|
|
||||||
for board in boards:
|
if (
|
||||||
if card_found:
|
doc_task.metadata
|
||||||
break
|
and "board_id" in doc_task.metadata
|
||||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
and "stack_id" in doc_task.metadata
|
||||||
if board.deletedAt > 0:
|
):
|
||||||
continue
|
# Fast path: Direct lookup with known board_id/stack_id
|
||||||
stacks = await nc_client.deck.get_stacks(board.id)
|
board_id = doc_task.metadata["board_id"]
|
||||||
for stack in stacks:
|
stack_id = doc_task.metadata["stack_id"]
|
||||||
|
try:
|
||||||
|
card = await nc_client.deck.get_card(
|
||||||
|
board_id=int(board_id),
|
||||||
|
stack_id=int(stack_id),
|
||||||
|
card_id=int(doc_task.doc_id),
|
||||||
|
)
|
||||||
|
# Fetch board and stack info for metadata
|
||||||
|
boards = await nc_client.deck.get_boards()
|
||||||
|
for b in boards:
|
||||||
|
if b.id == int(board_id):
|
||||||
|
board = b
|
||||||
|
stacks = await nc_client.deck.get_stacks(b.id)
|
||||||
|
for s in stacks:
|
||||||
|
if s.id == int(stack_id):
|
||||||
|
stack = s
|
||||||
|
break
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to fetch card with metadata (board_id={board_id}, stack_id={stack_id}, card_id={doc_task.doc_id}): {e}, falling back to iteration"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed)
|
||||||
|
if card is None:
|
||||||
|
boards = await nc_client.deck.get_boards()
|
||||||
|
card_found = False
|
||||||
|
|
||||||
|
for b in boards:
|
||||||
if card_found:
|
if card_found:
|
||||||
break
|
break
|
||||||
if stack.cards:
|
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||||
for card in stack.cards:
|
if b.deletedAt > 0:
|
||||||
if card.id == int(doc_task.doc_id):
|
continue
|
||||||
# Build content from card title and description
|
stacks = await nc_client.deck.get_stacks(b.id)
|
||||||
content_parts = [card.title]
|
for s in stacks:
|
||||||
if card.description:
|
if card_found:
|
||||||
content_parts.append(card.description)
|
break
|
||||||
content = "\n\n".join(content_parts)
|
if s.cards:
|
||||||
title = card.title
|
for c in s.cards:
|
||||||
|
if c.id == int(doc_task.doc_id):
|
||||||
|
card = c
|
||||||
|
board = b
|
||||||
|
stack = s
|
||||||
|
card_found = True
|
||||||
|
break
|
||||||
|
|
||||||
# Store deck-specific metadata
|
if not card_found:
|
||||||
file_metadata = {
|
raise ValueError(
|
||||||
"board_id": board.id,
|
f"Deck card {doc_task.doc_id} not found in any board/stack"
|
||||||
"board_title": board.title,
|
)
|
||||||
"stack_id": stack.id,
|
|
||||||
"stack_title": stack.title,
|
|
||||||
"card_type": card.type,
|
|
||||||
"duedate": (
|
|
||||||
card.duedate.isoformat()
|
|
||||||
if card.duedate
|
|
||||||
else None
|
|
||||||
),
|
|
||||||
"archived": card.archived,
|
|
||||||
"owner": (
|
|
||||||
card.owner.uid
|
|
||||||
if hasattr(card.owner, "uid")
|
|
||||||
else str(card.owner)
|
|
||||||
),
|
|
||||||
}
|
|
||||||
etag = card.etag or ""
|
|
||||||
file_path = None
|
|
||||||
content_bytes = None
|
|
||||||
content_type = None
|
|
||||||
card_found = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not card_found:
|
# Build content from card title and description
|
||||||
raise ValueError(
|
content_parts = [card.title]
|
||||||
f"Deck card {doc_task.doc_id} not found in any board/stack"
|
if card.description:
|
||||||
)
|
content_parts.append(card.description)
|
||||||
|
content = "\n\n".join(content_parts)
|
||||||
|
title = card.title
|
||||||
|
|
||||||
|
# Store deck-specific metadata
|
||||||
|
file_metadata = {
|
||||||
|
"board_id": board.id,
|
||||||
|
"board_title": board.title,
|
||||||
|
"stack_id": stack.id,
|
||||||
|
"stack_title": stack.title,
|
||||||
|
"card_type": card.type,
|
||||||
|
"duedate": (card.duedate.isoformat() if card.duedate else None),
|
||||||
|
"archived": card.archived,
|
||||||
|
"owner": (
|
||||||
|
card.owner.uid if hasattr(card.owner, "uid") else str(card.owner)
|
||||||
|
),
|
||||||
|
}
|
||||||
|
etag = card.etag or ""
|
||||||
|
file_path = None
|
||||||
|
content_bytes = None
|
||||||
|
content_type = None
|
||||||
elif doc_task.doc_type == "file":
|
elif doc_task.doc_type == "file":
|
||||||
# For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
|
# For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
|
||||||
if not doc_task.file_path:
|
if not doc_task.file_path:
|
||||||
|
|||||||
@@ -36,6 +36,9 @@ class DocumentTask:
|
|||||||
operation: str # "index" or "delete"
|
operation: str # "index" or "delete"
|
||||||
modified_at: int
|
modified_at: int
|
||||||
file_path: str | None = None # File path for files (when doc_id is file_id)
|
file_path: str | None = None # File path for files (when doc_id is file_id)
|
||||||
|
metadata: dict[str, int | str] | None = (
|
||||||
|
None # Additional metadata (e.g., board_id/stack_id for deck_card)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Track documents potentially deleted (grace period before actual deletion)
|
# Track documents potentially deleted (grace period before actual deletion)
|
||||||
@@ -874,6 +877,7 @@ async def scan_deck_cards(
|
|||||||
doc_type="deck_card",
|
doc_type="deck_card",
|
||||||
operation="index",
|
operation="index",
|
||||||
modified_at=modified_at,
|
modified_at=modified_at,
|
||||||
|
metadata={"board_id": board.id, "stack_id": stack.id},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
queued += 1
|
queued += 1
|
||||||
@@ -921,6 +925,7 @@ async def scan_deck_cards(
|
|||||||
doc_type="deck_card",
|
doc_type="deck_card",
|
||||||
operation="index",
|
operation="index",
|
||||||
modified_at=modified_at,
|
modified_at=modified_at,
|
||||||
|
metadata={"board_id": board.id, "stack_id": stack.id},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
queued += 1
|
queued += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user