From e0320e761c7b416aeb193db053716bd992082ef9 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Sun, 14 Dec 2025 00:23:12 +0100 Subject: [PATCH] perf(deck): optimize card lookup by storing board_id/stack_id in metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses reviewer feedback on PR #395 about O(n²) performance issue. Changes: - scanner.py: Add metadata field to DocumentTask with board_id/stack_id - scanner.py: Populate metadata during deck card scanning (both initial and incremental sync) - processor.py: Use metadata for O(1) card lookup via get_card() API when available - processor.py: Fallback to iteration for legacy data without metadata - context.py: Add _get_deck_metadata_from_qdrant() helper to retrieve metadata from Qdrant - context.py: Use metadata for fast path lookup in chunk context expansion - context.py: Add user_id parameter to _fetch_document_text() for metadata retrieval Performance Impact: - Before: O(boards × stacks × cards) iteration for each card lookup - After: O(1) direct API call using stored board_id/stack_id - Graceful degradation: Falls back to iteration for legacy data Testing: - All existing integration tests pass (test_deck_vector_search.py) - Type checking passes with no new errors 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 --- nextcloud_mcp_server/search/context.py | 153 ++++++++++++++++++----- nextcloud_mcp_server/vector/processor.py | 135 ++++++++++++-------- nextcloud_mcp_server/vector/scanner.py | 5 + 3 files changed, 209 insertions(+), 84 deletions(-) diff --git a/nextcloud_mcp_server/search/context.py b/nextcloud_mcp_server/search/context.py index 5c12e05..5de6d79 100644 --- a/nextcloud_mcp_server/search/context.py +++ b/nextcloud_mcp_server/search/context.py @@ -209,6 +209,64 @@ async def _get_file_path_from_qdrant( return None +async def _get_deck_metadata_from_qdrant( + user_id: str, card_id: int +) -> dict[str, int] | None: + """Retrieve board_id and stack_id for a deck card from Qdrant payload. + + Args: + user_id: User ID who owns the card + card_id: Card ID + + Returns: + Dictionary with board_id and stack_id, or None if not found + """ + try: + from qdrant_client.models import FieldCondition, Filter, MatchValue + + from nextcloud_mcp_server.config import get_settings + from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client + + qdrant_client = await get_qdrant_client() + settings = get_settings() + + # Query for any chunk of this card (we just need metadata) + scroll_result = await qdrant_client.scroll( + collection_name=settings.get_collection_name(), + scroll_filter=Filter( + must=[ + FieldCondition(key="user_id", match=MatchValue(value=user_id)), + FieldCondition(key="doc_id", match=MatchValue(value=card_id)), + FieldCondition(key="doc_type", match=MatchValue(value="deck_card")), + ] + ), + limit=1, + with_payload=["board_id", "stack_id"], + with_vectors=False, + ) + + if scroll_result[0]: + point = scroll_result[0][0] + board_id = point.payload.get("board_id") + stack_id = point.payload.get("stack_id") + if board_id is not None and stack_id is not None: + logger.debug( + f"Retrieved deck metadata for card {card_id}: " + f"board_id={board_id}, stack_id={stack_id}" + ) + return {"board_id": int(board_id), "stack_id": int(stack_id)} + + logger.debug( + f"Could not find deck metadata in Qdrant for card {card_id} " + f"(might be legacy data without board_id/stack_id)" + ) + return None + + except Exception as e: + logger.debug(f"Error querying Qdrant for deck metadata: {e}") + return None + + @dataclass class ChunkContext: """Expanded chunk with surrounding context and position markers. @@ -394,7 +452,9 @@ async def get_chunk_with_context( logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}") # Fetch full document text - full_text = await _fetch_document_text(nc_client, resolved_doc_id, doc_type) + full_text = await _fetch_document_text( + nc_client, resolved_doc_id, doc_type, user_id + ) if full_text is None: logger.warning( f"Could not fetch document text for {doc_type} {doc_id}, " @@ -453,7 +513,7 @@ async def get_chunk_with_context( async def _fetch_document_text( - nc_client: NextcloudClient, doc_id: str | int, doc_type: str + nc_client: NextcloudClient, doc_id: str | int, doc_type: str, user_id: str ) -> str | None: """Fetch full text content of a document. @@ -546,44 +606,71 @@ async def _fetch_document_text( return "\n".join(content_parts) elif doc_type == "deck_card": # Fetch card from Deck API - # Note: Deck API requires board_id and stack_id, but we don't store those - # We need to search through boards to find the card (same as processor.py) - boards = await nc_client.deck.get_boards() - card_found = False + # Try to get board_id/stack_id from Qdrant metadata (O(1) lookup) + # Otherwise fall back to iteration (legacy data) + card = None + deck_metadata = await _get_deck_metadata_from_qdrant(user_id, int(doc_id)) - for board in boards: - if card_found: - break - - # Skip deleted boards (soft delete: deletedAt > 0) - if board.deletedAt > 0: - logger.debug( - f"Skipping deleted board {board.id} while searching for card {doc_id}" + if deck_metadata: + # Fast path: Direct lookup with known board_id/stack_id + board_id = deck_metadata["board_id"] + stack_id = deck_metadata["stack_id"] + try: + card = await nc_client.deck.get_card( + board_id=board_id, stack_id=stack_id, card_id=int(doc_id) + ) + logger.debug( + f"Retrieved deck card {doc_id} using metadata " + f"(board_id={board_id}, stack_id={stack_id})" + ) + except Exception as e: + logger.warning( + f"Failed to fetch card with metadata (board_id={board_id}, " + f"stack_id={stack_id}, card_id={doc_id}): {e}, falling back to iteration" ) - continue - stacks = await nc_client.deck.get_stacks(board.id) + # Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed) + if card is None: + boards = await nc_client.deck.get_boards() + card_found = False - for stack in stacks: + for board in boards: if card_found: break - if stack.cards: - for card in stack.cards: - if card.id == int(doc_id): - # Reconstruct full content as indexed: title + "\n\n" + description - # This ensures chunk offsets align with indexed content structure - content_parts = [card.title] - if card.description: - content_parts.append(card.description) - card_found = True - logger.debug( - f"Found deck card {doc_id} in board {board.id}, stack {stack.id}" - ) - return "\n\n".join(content_parts) - # Card not found (might be archived or deleted) - logger.warning(f"Deck card {doc_id} not found in any board/stack") - return None + # Skip deleted boards (soft delete: deletedAt > 0) + if board.deletedAt > 0: + logger.debug( + f"Skipping deleted board {board.id} while searching for card {doc_id}" + ) + continue + + stacks = await nc_client.deck.get_stacks(board.id) + + for stack in stacks: + if card_found: + break + if stack.cards: + for c in stack.cards: + if c.id == int(doc_id): + card = c + card_found = True + logger.debug( + f"Found deck card {doc_id} in board {board.id}, " + f"stack {stack.id} (fallback iteration)" + ) + break + + if not card_found: + logger.warning(f"Deck card {doc_id} not found in any board/stack") + return None + + # Reconstruct full content as indexed: title + "\n\n" + description + # This ensures chunk offsets align with indexed content structure + content_parts = [card.title] + if card.description: + content_parts.append(card.description) + return "\n\n".join(content_parts) else: logger.warning(f"Unsupported doc_type for context expansion: {doc_type}") return None diff --git a/nextcloud_mcp_server/vector/processor.py b/nextcloud_mcp_server/vector/processor.py index 03751b1..21974d6 100644 --- a/nextcloud_mcp_server/vector/processor.py +++ b/nextcloud_mcp_server/vector/processor.py @@ -314,62 +314,95 @@ async def _index_document( content_type = None elif doc_task.doc_type == "deck_card": # Fetch card from Deck API - # Note: We need board_id and stack_id to fetch the card - # For now, we'll need to get all boards and find the card - # This is not optimal, but Deck API requires board_id and stack_id - boards = await nc_client.deck.get_boards() - card_found = False + # Use metadata from scanner if available (O(1) lookup) + # Otherwise fall back to iteration (legacy data) + card = None + board = None + stack = None - for board in boards: - if card_found: - break - # Skip deleted boards (soft delete: deletedAt > 0) - if board.deletedAt > 0: - continue - stacks = await nc_client.deck.get_stacks(board.id) - for stack in stacks: + if ( + doc_task.metadata + and "board_id" in doc_task.metadata + and "stack_id" in doc_task.metadata + ): + # Fast path: Direct lookup with known board_id/stack_id + board_id = doc_task.metadata["board_id"] + stack_id = doc_task.metadata["stack_id"] + try: + card = await nc_client.deck.get_card( + board_id=int(board_id), + stack_id=int(stack_id), + card_id=int(doc_task.doc_id), + ) + # Fetch board and stack info for metadata + boards = await nc_client.deck.get_boards() + for b in boards: + if b.id == int(board_id): + board = b + stacks = await nc_client.deck.get_stacks(b.id) + for s in stacks: + if s.id == int(stack_id): + stack = s + break + break + except Exception as e: + logger.warning( + f"Failed to fetch card with metadata (board_id={board_id}, stack_id={stack_id}, card_id={doc_task.doc_id}): {e}, falling back to iteration" + ) + + # Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed) + if card is None: + boards = await nc_client.deck.get_boards() + card_found = False + + for b in boards: if card_found: break - if stack.cards: - for card in stack.cards: - if card.id == int(doc_task.doc_id): - # Build content from card title and description - content_parts = [card.title] - if card.description: - content_parts.append(card.description) - content = "\n\n".join(content_parts) - title = card.title + # Skip deleted boards (soft delete: deletedAt > 0) + if b.deletedAt > 0: + continue + stacks = await nc_client.deck.get_stacks(b.id) + for s in stacks: + if card_found: + break + if s.cards: + for c in s.cards: + if c.id == int(doc_task.doc_id): + card = c + board = b + stack = s + card_found = True + break - # Store deck-specific metadata - file_metadata = { - "board_id": board.id, - "board_title": board.title, - "stack_id": stack.id, - "stack_title": stack.title, - "card_type": card.type, - "duedate": ( - card.duedate.isoformat() - if card.duedate - else None - ), - "archived": card.archived, - "owner": ( - card.owner.uid - if hasattr(card.owner, "uid") - else str(card.owner) - ), - } - etag = card.etag or "" - file_path = None - content_bytes = None - content_type = None - card_found = True - break + if not card_found: + raise ValueError( + f"Deck card {doc_task.doc_id} not found in any board/stack" + ) - if not card_found: - raise ValueError( - f"Deck card {doc_task.doc_id} not found in any board/stack" - ) + # Build content from card title and description + content_parts = [card.title] + if card.description: + content_parts.append(card.description) + content = "\n\n".join(content_parts) + title = card.title + + # Store deck-specific metadata + file_metadata = { + "board_id": board.id, + "board_title": board.title, + "stack_id": stack.id, + "stack_title": stack.title, + "card_type": card.type, + "duedate": (card.duedate.isoformat() if card.duedate else None), + "archived": card.archived, + "owner": ( + card.owner.uid if hasattr(card.owner, "uid") else str(card.owner) + ), + } + etag = card.etag or "" + file_path = None + content_bytes = None + content_type = None elif doc_task.doc_type == "file": # For files, doc_id is now the numeric file ID, file_path comes from DocumentTask if not doc_task.file_path: diff --git a/nextcloud_mcp_server/vector/scanner.py b/nextcloud_mcp_server/vector/scanner.py index 4106e3b..822528b 100644 --- a/nextcloud_mcp_server/vector/scanner.py +++ b/nextcloud_mcp_server/vector/scanner.py @@ -36,6 +36,9 @@ class DocumentTask: operation: str # "index" or "delete" modified_at: int file_path: str | None = None # File path for files (when doc_id is file_id) + metadata: dict[str, int | str] | None = ( + None # Additional metadata (e.g., board_id/stack_id for deck_card) + ) # Track documents potentially deleted (grace period before actual deletion) @@ -874,6 +877,7 @@ async def scan_deck_cards( doc_type="deck_card", operation="index", modified_at=modified_at, + metadata={"board_id": board.id, "stack_id": stack.id}, ) ) queued += 1 @@ -921,6 +925,7 @@ async def scan_deck_cards( doc_type="deck_card", operation="index", modified_at=modified_at, + metadata={"board_id": board.id, "stack_id": stack.id}, ) ) queued += 1