From b5b03bfd78edebf0e4a177717b11b34226edce53 Mon Sep 17 00:00:00 2001
From: Chris Coutinho <chris@coutinho.io>
Date: Sat, 15 Nov 2025 01:19:29 +0100
Subject: [PATCH] feat: Add multi-document Protocol with cross-app search
 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements NextcloudClientProtocol for multi-document type search following
user requirement that document types are not 1:1 with apps (e.g., Notes app
specializes in markdown, while Files/WebDAV handles multiple file types).

Key Changes:
- NextcloudClientProtocol: Generic protocol with app-specific client properties
- get_indexed_doc_types(): Query Qdrant for actually-indexed document types
- Document dispatch: All algorithms check Qdrant before attempting access
- Cross-type deduplication: Use (doc_id, doc_type) tuples in hybrid RRF

Search Algorithm Updates:
- Semantic: Added _verify_document_access() with dispatch to appropriate client
  - Deduplication by (doc_id, doc_type) tuple
  - Only "note" verification implemented, others return None with info log
- Keyword: Added _fetch_documents() dispatch method
  - Queries Qdrant for available types before fetching
  - Supports cross-app search when doc_type=None
- Fuzzy: Same pattern as keyword search
- Hybrid: Already uses (doc_id, doc_type) for deduplication (no changes needed)

Future-Proof Design:
- File/calendar verification stubs in place
- Clear logging when unsupported types found
- Easy to extend when processor indexes new document types

Currently Supported:
- "note" documents fully implemented and tested
- Other types gracefully handled (logged but skipped)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 nextcloud_mcp_server/search/__init__.py   |   9 +-
 nextcloud_mcp_server/search/algorithms.py | 115 +++++++++++++++++-
 nextcloud_mcp_server/search/fuzzy.py      |  91 ++++++++++----
 nextcloud_mcp_server/search/hybrid.py     |   9 +-
 nextcloud_mcp_server/search/keyword.py    |  98 ++++++++++-----
 nextcloud_mcp_server/search/semantic.py   | 138 ++++++++++++++--------
 6 files changed, 360 insertions(+), 100 deletions(-)

diff --git a/nextcloud_mcp_server/search/__init__.py b/nextcloud_mcp_server/search/__init__.py
index 1da5a84..d6ec32a 100644
--- a/nextcloud_mcp_server/search/__init__.py
+++ b/nextcloud_mcp_server/search/__init__.py
@@ -10,15 +10,22 @@ All algorithms share the same interface and can be used interchangeably by both
 MCP tools and the visualization pane.
 """
 
-from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+    get_indexed_doc_types,
+)
 from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
 from nextcloud_mcp_server.search.hybrid import HybridSearchAlgorithm
 from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
 from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
 
 __all__ = [
+    "NextcloudClientProtocol",
     "SearchAlgorithm",
     "SearchResult",
+    "get_indexed_doc_types",
     "SemanticSearchAlgorithm",
     "KeywordSearchAlgorithm",
     "FuzzySearchAlgorithm",
diff --git a/nextcloud_mcp_server/search/algorithms.py b/nextcloud_mcp_server/search/algorithms.py
index 560e113..2a7536c 100644
--- a/nextcloud_mcp_server/search/algorithms.py
+++ b/nextcloud_mcp_server/search/algorithms.py
@@ -2,7 +2,120 @@
 
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Any
+from typing import Any, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class NextcloudClientProtocol(Protocol):
+    """Protocol for Nextcloud client supporting multi-document search.
+
+    This protocol defines the interface that search algorithms need from a
+    Nextcloud client to access documents across different apps (Notes, Files,
+    Calendar, etc.). The client provides access to app-specific sub-clients
+    that handle the actual API calls.
+
+    Document types (e.g., "note", "file", "calendar") are NOT 1:1 with apps.
+    For example, the Notes app specializes in markdown files, while Files/WebDAV
+    handles multiple file types. The abstraction is at the document type level.
+
+    Search algorithms query Qdrant to determine which document types are actually
+    indexed before attempting to access them, enabling graceful cross-app search.
+    """
+
+    username: str
+
+    # App-specific clients that search algorithms dispatch to
+    @property
+    def notes(self) -> Any:
+        """Notes client for accessing note documents."""
+        ...
+
+    @property
+    def webdav(self) -> Any:
+        """WebDAV client for accessing file documents."""
+        ...
+
+    @property
+    def calendar(self) -> Any:
+        """Calendar client for accessing event/task documents."""
+        ...
+
+    @property
+    def contacts(self) -> Any:
+        """Contacts client for accessing contact card documents."""
+        ...
+
+    @property
+    def deck(self) -> Any:
+        """Deck client for accessing deck card documents."""
+        ...
+
+    @property
+    def cookbook(self) -> Any:
+        """Cookbook client for accessing recipe documents."""
+        ...
+
+    @property
+    def tables(self) -> Any:
+        """Tables client for accessing table row documents."""
+        ...
+
+
+async def get_indexed_doc_types(user_id: str) -> set[str]:
+    """Query Qdrant to get actually-indexed document types for a user.
+
+    This enables search algorithms to check which document types are available
+    before attempting to search/verify them, allowing graceful cross-app search.
+
+    Args:
+        user_id: User ID to filter by
+
+    Returns:
+        Set of document type strings (e.g., {"note", "file", "calendar"})
+
+    Example:
+        >>> types = await get_indexed_doc_types("alice")
+        >>> if "note" in types:
+        ...     # Search notes
+    """
+    import logging
+
+    from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+    from nextcloud_mcp_server.config import get_settings
+    from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+    logger = logging.getLogger(__name__)
+    settings = get_settings()
+
+    qdrant_client = await get_qdrant_client()
+    collection = settings.qdrant_collection
+
+    # Use scroll to sample documents and extract doc_types
+    # Note: This could be optimized with a facet/aggregation query if Qdrant adds support
+    try:
+        scroll_results, _next_offset = await qdrant_client.scroll(
+            collection_name=collection,
+            scroll_filter=Filter(
+                must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
+            ),
+            limit=1000,  # Sample size to discover types
+            with_payload=["doc_type"],
+            with_vectors=False,  # Don't need vectors for type discovery
+        )
+
+        doc_types = {
+            point.payload.get("doc_type")
+            for point in scroll_results
+            if point.payload.get("doc_type")
+        }
+
+        logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
+        return doc_types
+
+    except Exception as e:
+        logger.warning(f"Failed to query Qdrant for doc_types: {e}")
+        return set()
 
 
 @dataclass
diff --git a/nextcloud_mcp_server/search/fuzzy.py b/nextcloud_mcp_server/search/fuzzy.py
index 479459f..acd57d1 100644
--- a/nextcloud_mcp_server/search/fuzzy.py
+++ b/nextcloud_mcp_server/search/fuzzy.py
@@ -3,8 +3,12 @@
 import logging
 from typing import Any
 
-from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+    get_indexed_doc_types,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -38,7 +42,7 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
         user_id: str,
         limit: int = 10,
         doc_type: str | None = None,
-        nextcloud_client: NextcloudClient | None = None,
+        nextcloud_client: NextcloudClientProtocol | None = None,
         **kwargs: Any,
     ) -> list[SearchResult]:
         """Execute fuzzy search using character overlap.
@@ -67,22 +71,39 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
             f"limit={limit}, threshold={threshold}, doc_type={doc_type}"
         )
 
-        # Currently only supports notes
-        if doc_type and doc_type != "note":
-            logger.warning(f"Fuzzy search not yet implemented for doc_type={doc_type}")
-            return []
+        # Get available document types from Qdrant
+        indexed_types = await get_indexed_doc_types(user_id)
+        logger.debug(f"Indexed document types for user: {indexed_types}")
 
-        # Fetch all notes for the user
-        notes = await nextcloud_client.notes.get_notes()
-        logger.debug(f"Fetched {len(notes)} notes for fuzzy search")
+        # Determine which types to search
+        if doc_type:
+            # Search specific type if requested
+            search_types = [doc_type] if doc_type in indexed_types else []
+            if not search_types:
+                logger.info(f"Doc type '{doc_type}' not indexed for user {user_id}")
+                return []
+        else:
+            # Search all indexed types
+            search_types = list(indexed_types)
 
-        # Score and filter notes
-        scored_notes = []
+        # Fetch documents for each type and score them
+        all_documents = []
+        for dtype in search_types:
+            documents = await self._fetch_documents(nextcloud_client, dtype)
+            for doc in documents:
+                doc["_doc_type"] = dtype  # Tag with type
+            all_documents.extend(documents)
+
+        logger.debug(f"Fetched {len(all_documents)} total documents for fuzzy search")
+
+        # Score and filter documents
+        scored_results = []
         query_lower = query.lower()
 
-        for note in notes:
-            title = note.get("title", "")
-            content = note.get("content", "")
+        for doc in all_documents:
+            dtype = doc.get("_doc_type", "note")
+            title = doc.get("title", "")
+            content = doc.get("content", "")
 
             # Check title match
             title_score = self._calculate_char_overlap(query_lower, title.lower())
@@ -100,16 +121,16 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
                 else:
                     excerpt = self._extract_excerpt(content, max_length=200)
 
-                scored_notes.append(
+                scored_results.append(
                     SearchResult(
-                        id=note["id"],
-                        doc_type="note",
+                        id=doc["id"],
+                        doc_type=dtype,
                         title=title or "Untitled",
                         excerpt=excerpt,
                         score=best_score,
                         metadata={
-                            "category": note.get("category", ""),
-                            "modified": note.get("modified"),
+                            "category": doc.get("category", ""),
+                            "modified": doc.get("modified"),
                             "match_location": "title"
                             if title_score >= content_score
                             else "content",
@@ -118,8 +139,8 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
                 )
 
         # Sort by score (descending) and limit
-        scored_notes.sort(key=lambda x: x.score, reverse=True)
-        results = scored_notes[:limit]
+        scored_results.sort(key=lambda x: x.score, reverse=True)
+        results = scored_results[:limit]
 
         logger.info(f"Fuzzy search returned {len(results)} matching notes")
         if results:
@@ -131,6 +152,32 @@ class FuzzySearchAlgorithm(SearchAlgorithm):
 
         return results
 
+    async def _fetch_documents(
+        self, nextcloud_client: NextcloudClientProtocol, doc_type: str
+    ) -> list[dict[str, Any]]:
+        """Fetch documents of a specific type from Nextcloud.
+
+        Args:
+            nextcloud_client: Client for API access
+            doc_type: Document type to fetch ("note", "file", "calendar", etc.)
+
+        Returns:
+            List of document dictionaries with at minimum: id, title, content
+        """
+        if doc_type == "note":
+            return await nextcloud_client.notes.get_notes()
+        elif doc_type == "file":
+            # Future: fetch files when indexed
+            logger.info("File documents not yet supported for fuzzy search")
+            return []
+        elif doc_type == "calendar":
+            # Future: fetch calendar events when indexed
+            logger.info("Calendar documents not yet supported for fuzzy search")
+            return []
+        else:
+            logger.warning(f"Unknown document type '{doc_type}' for fuzzy search")
+            return []
+
     def _calculate_char_overlap(self, query: str, text: str) -> float:
         """Calculate character overlap ratio between query and text.
 
diff --git a/nextcloud_mcp_server/search/hybrid.py b/nextcloud_mcp_server/search/hybrid.py
index a8778c8..947f1f6 100644
--- a/nextcloud_mcp_server/search/hybrid.py
+++ b/nextcloud_mcp_server/search/hybrid.py
@@ -5,8 +5,11 @@ import logging
 from collections import defaultdict
 from typing import Any
 
-from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+)
 from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
 from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
 from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
@@ -82,7 +85,7 @@ class HybridSearchAlgorithm(SearchAlgorithm):
         user_id: str,
         limit: int = 10,
         doc_type: str | None = None,
-        nextcloud_client: NextcloudClient | None = None,
+        nextcloud_client: NextcloudClientProtocol | None = None,
         **kwargs: Any,
     ) -> list[SearchResult]:
         """Execute hybrid search using RRF to combine algorithms.
diff --git a/nextcloud_mcp_server/search/keyword.py b/nextcloud_mcp_server/search/keyword.py
index 410a7a7..d4e8002 100644
--- a/nextcloud_mcp_server/search/keyword.py
+++ b/nextcloud_mcp_server/search/keyword.py
@@ -3,8 +3,12 @@
 import logging
 from typing import Any
 
-from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+    get_indexed_doc_types,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -32,7 +36,7 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
         user_id: str,
         limit: int = 10,
         doc_type: str | None = None,
-        nextcloud_client: NextcloudClient | None = None,
+        nextcloud_client: NextcloudClientProtocol | None = None,
         **kwargs: Any,
     ) -> list[SearchResult]:
         """Execute keyword search using token matching.
@@ -63,52 +67,66 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
         query_tokens = self._process_query(query)
         logger.debug(f"Query tokens: {query_tokens}")
 
-        # Currently only supports notes
-        # TODO: Extend to other document types (files, calendar, etc.)
-        if doc_type and doc_type != "note":
-            logger.warning(
-                f"Keyword search not yet implemented for doc_type={doc_type}"
-            )
-            return []
+        # Get available document types from Qdrant
+        indexed_types = await get_indexed_doc_types(user_id)
+        logger.debug(f"Indexed document types for user: {indexed_types}")
 
-        # Fetch all notes for the user
-        notes = await nextcloud_client.notes.get_notes()
-        logger.debug(f"Fetched {len(notes)} notes for keyword search")
+        # Determine which types to search
+        if doc_type:
+            # Search specific type if requested
+            search_types = [doc_type] if doc_type in indexed_types else []
+            if not search_types:
+                logger.info(f"Doc type '{doc_type}' not indexed for user {user_id}")
+                return []
+        else:
+            # Search all indexed types
+            search_types = list(indexed_types)
 
-        # Score and filter notes
-        scored_notes = []
-        for note in notes:
+        # Fetch documents for each type and score them
+        all_documents = []
+        for dtype in search_types:
+            documents = await self._fetch_documents(nextcloud_client, dtype)
+            for doc in documents:
+                doc["_doc_type"] = dtype  # Tag with type
+            all_documents.extend(documents)
+
+        logger.debug(f"Fetched {len(all_documents)} total documents for keyword search")
+
+        # Score and filter documents
+        scored_results = []
+        for doc in all_documents:
+            dtype = doc.get("_doc_type", "note")
             score = self._calculate_score(
                 query_tokens,
-                note.get("title", ""),
-                note.get("content", ""),
+                doc.get("title", ""),
+                doc.get("content", ""),
             )
 
             if score > 0:  # Only include matches
                 # Extract excerpt with context
                 excerpt = self._extract_excerpt(
-                    note.get("content", ""),
+                    doc.get("content", ""),
                     query_tokens,
                     max_length=200,
                 )
 
-                scored_notes.append(
+                scored_results.append(
                     SearchResult(
-                        id=note["id"],
-                        doc_type="note",
-                        title=note.get("title", "Untitled"),
+                        id=doc["id"],
+                        doc_type=dtype,
+                        title=doc.get("title", "Untitled"),
                         excerpt=excerpt,
                         score=score,
                         metadata={
-                            "category": note.get("category", ""),
-                            "modified": note.get("modified"),
+                            "category": doc.get("category", ""),
+                            "modified": doc.get("modified"),
                         },
                     )
                 )
 
         # Sort by score (descending) and limit
-        scored_notes.sort(key=lambda x: x.score, reverse=True)
-        results = scored_notes[:limit]
+        scored_results.sort(key=lambda x: x.score, reverse=True)
+        results = scored_results[:limit]
 
         logger.info(f"Keyword search returned {len(results)} matching notes")
         if results:
@@ -120,6 +138,32 @@ class KeywordSearchAlgorithm(SearchAlgorithm):
 
         return results
 
+    async def _fetch_documents(
+        self, nextcloud_client: NextcloudClientProtocol, doc_type: str
+    ) -> list[dict[str, Any]]:
+        """Fetch documents of a specific type from Nextcloud.
+
+        Args:
+            nextcloud_client: Client for API access
+            doc_type: Document type to fetch ("note", "file", "calendar", etc.)
+
+        Returns:
+            List of document dictionaries with at minimum: id, title, content
+        """
+        if doc_type == "note":
+            return await nextcloud_client.notes.get_notes()
+        elif doc_type == "file":
+            # Future: fetch files when indexed
+            logger.info("File documents not yet supported for keyword search")
+            return []
+        elif doc_type == "calendar":
+            # Future: fetch calendar events when indexed
+            logger.info("Calendar documents not yet supported for keyword search")
+            return []
+        else:
+            logger.warning(f"Unknown document type '{doc_type}' for keyword search")
+            return []
+
     def _process_query(self, query: str) -> list[str]:
         """Tokenize and normalize query.
 
diff --git a/nextcloud_mcp_server/search/semantic.py b/nextcloud_mcp_server/search/semantic.py
index c6e632d..e38b16d 100644
--- a/nextcloud_mcp_server/search/semantic.py
+++ b/nextcloud_mcp_server/search/semantic.py
@@ -6,11 +6,14 @@ from typing import Any
 from httpx import HTTPStatusError
 from qdrant_client.models import FieldCondition, Filter, MatchValue
 
-from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import get_settings
 from nextcloud_mcp_server.embedding import get_embedding_service
 from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
-from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+)
 from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
 
 logger = logging.getLogger(__name__)
@@ -45,7 +48,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
         user_id: str,
         limit: int = 10,
         doc_type: str | None = None,
-        nextcloud_client: NextcloudClient | None = None,
+        nextcloud_client: NextcloudClientProtocol | None = None,
         **kwargs: Any,
     ) -> list[SearchResult]:
         """Execute semantic search using vector similarity.
@@ -144,9 +147,13 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
         self,
         points: list[Any],
         limit: int,
-        nextcloud_client: NextcloudClient | None,
+        nextcloud_client: NextcloudClientProtocol | None,
     ) -> list[SearchResult]:
-        """Deduplicate results by doc_id and verify access.
+        """Deduplicate results by (doc_id, doc_type) and verify access.
+
+        Supports multiple document types with dispatch to appropriate client methods.
+        Deduplication is now by (doc_id, doc_type) tuple to handle cases where
+        the same ID might exist across different document types.
 
         Args:
             points: Qdrant search results
@@ -156,58 +163,32 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
         Returns:
             List of SearchResult objects
         """
-        seen_doc_ids = set()
+        seen_docs = set()  # Track (doc_id, doc_type) tuples
         results = []
 
         for result in points:
             doc_id = int(result.payload["doc_id"])
             doc_type = result.payload.get("doc_type", "note")
+            doc_key = (doc_id, doc_type)
 
             # Skip if we've already seen this document
-            if doc_id in seen_doc_ids:
+            if doc_key in seen_docs:
                 continue
 
-            seen_doc_ids.add(doc_id)
+            seen_docs.add(doc_key)
 
             # Verify access via Nextcloud API if client provided
-            # Currently only supports notes
-            if nextcloud_client and doc_type == "note":
-                try:
-                    note = await nextcloud_client.notes.get_note(doc_id)
+            # Dispatch to appropriate client based on doc_type
+            verified_result = None
 
-                    results.append(
-                        SearchResult(
-                            id=doc_id,
-                            doc_type="note",
-                            title=result.payload["title"],
-                            excerpt=result.payload["excerpt"],
-                            score=result.score,
-                            metadata={
-                                "category": note.get("category", ""),
-                                "chunk_index": result.payload["chunk_index"],
-                                "total_chunks": result.payload["total_chunks"],
-                            },
-                        )
-                    )
+            if nextcloud_client:
+                verified_result = await self._verify_document_access(
+                    nextcloud_client, doc_id, doc_type, result
+                )
 
-                    if len(results) >= limit:
-                        break
-
-                except HTTPStatusError as e:
-                    if e.response.status_code in (403, 404):
-                        # User lost access or document deleted
-                        logger.debug(
-                            f"Skipping note {doc_id}: {e.response.status_code}"
-                        )
-                        continue
-                    else:
-                        # Log other errors but continue processing
-                        logger.warning(
-                            f"Error verifying access to note {doc_id}: "
-                            f"{e.response.status_code}"
-                        )
-                        continue
-            else:
+            if verified_result:
+                results.append(verified_result)
+            elif not nextcloud_client:
                 # No access verification, return result directly
                 results.append(
                     SearchResult(
@@ -223,7 +204,72 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
                     )
                 )
 
-                if len(results) >= limit:
-                    break
+            if len(results) >= limit:
+                break
 
         return results
+
+    async def _verify_document_access(
+        self,
+        nextcloud_client: NextcloudClientProtocol,
+        doc_id: int,
+        doc_type: str,
+        qdrant_result: Any,
+    ) -> SearchResult | None:
+        """Verify user has access to a document via Nextcloud API.
+
+        Dispatches to appropriate client method based on document type.
+
+        Args:
+            nextcloud_client: Client for API access
+            doc_id: Document ID
+            doc_type: Document type ("note", "file", "calendar", etc.)
+            qdrant_result: Original Qdrant search result
+
+        Returns:
+            SearchResult if access verified, None if access denied or error
+        """
+        try:
+            if doc_type == "note":
+                note = await nextcloud_client.notes.get_note(doc_id)
+                return SearchResult(
+                    id=doc_id,
+                    doc_type="note",
+                    title=qdrant_result.payload["title"],
+                    excerpt=qdrant_result.payload["excerpt"],
+                    score=qdrant_result.score,
+                    metadata={
+                        "category": note.get("category", ""),
+                        "chunk_index": qdrant_result.payload["chunk_index"],
+                        "total_chunks": qdrant_result.payload["total_chunks"],
+                    },
+                )
+            elif doc_type == "file":
+                # Future: verify file access when files are indexed
+                logger.info(
+                    f"File {doc_id} found in search but file verification not yet implemented"
+                )
+                return None
+            elif doc_type == "calendar":
+                # Future: verify calendar access when calendar events are indexed
+                logger.info(
+                    f"Calendar event {doc_id} found in search but calendar verification not yet implemented"
+                )
+                return None
+            else:
+                logger.warning(
+                    f"Unknown document type '{doc_type}' for doc_id {doc_id}"
+                )
+                return None
+
+        except HTTPStatusError as e:
+            if e.response.status_code in (403, 404):
+                # User lost access or document deleted
+                logger.debug(f"Skipping {doc_type} {doc_id}: {e.response.status_code}")
+                return None
+            else:
+                # Log other errors but continue processing
+                logger.warning(
+                    f"Error verifying access to {doc_type} {doc_id}: {e.response.status_code}"
+                )
+                return None