feat: implement semantic search tool and fix vector sync issues (ADR-007 Phase 3)

Completes the ADR-007 implementation by adding user-facing semantic search functionality. Previous phases implemented scanner and processor for background indexing; this adds the query interface. Changes: - Add nc_notes_semantic_search MCP tool for natural language queries - Fix Qdrant point IDs to use UUIDs instead of strings (was causing 400 errors) - Reduce scan interval default from 1 hour to 5 minutes for faster updates - Add SemanticSearchResult and SemanticSearchNotesResponse models - Implement dual-phase authorization (Qdrant filter + Nextcloud API verification) The semantic search enables finding notes by meaning rather than exact keywords, using vector embeddings to understand query intent. Point ID fix resolves critical bug where all document indexing failed with "invalid point ID" errors. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-08 21:51:12 +01:00
parent 4dbb2eb468
commit fdd82f59e2
4 changed files with 175 additions and 3 deletions
@@ -158,7 +158,7 @@ class Settings:

    # Vector sync settings (ADR-007)
    vector_sync_enabled: bool = False
-    vector_sync_scan_interval: int = 3600  # seconds
+    vector_sync_scan_interval: int = 300  # seconds (5 minutes)
    vector_sync_processor_workers: int = 3
    vector_sync_queue_max_size: int = 10000

@@ -212,7 +212,7 @@ def get_settings() -> Settings:
        vector_sync_enabled=(
            os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
        ),
-        vector_sync_scan_interval=int(os.getenv("VECTOR_SYNC_SCAN_INTERVAL", "3600")),
+        vector_sync_scan_interval=int(os.getenv("VECTOR_SYNC_SCAN_INTERVAL", "300")),
        vector_sync_processor_workers=int(
            os.getenv("VECTOR_SYNC_PROCESSOR_WORKERS", "3")
        ),
@@ -37,6 +37,18 @@ class NoteSearchResult(BaseModel):
    score: Optional[float] = Field(None, description="Search relevance score")


+class SemanticSearchResult(BaseModel):
+    """Model for semantic search results with additional metadata."""
+
+    id: int = Field(description="Note ID")
+    title: str = Field(description="Note title")
+    category: str = Field(default="", description="Note category")
+    excerpt: str = Field(description="Excerpt from matching chunk")
+    score: float = Field(description="Semantic similarity score (0-1)")
+    chunk_index: int = Field(description="Index of matching chunk in document")
+    total_chunks: int = Field(description="Total number of chunks in document")
+
+
 class NotesSettings(BaseModel):
    """Model for Notes app settings."""

@@ -83,3 +95,16 @@ class SearchNotesResponse(BaseResponse):
    results: List[NoteSearchResult] = Field(description="Search results")
    query: str = Field(description="The search query used")
    total_found: int = Field(description="Total number of notes found")
+
+
+class SemanticSearchNotesResponse(BaseResponse):
+    """Response model for semantic search."""
+
+    results: List[SemanticSearchResult] = Field(
+        description="Semantic search results with similarity scores"
+    )
+    query: str = Field(description="The search query used")
+    total_found: int = Field(description="Total number of notes found")
+    search_method: str = Field(
+        default="semantic", description="Search method used (semantic or hybrid)"
+    )
@@ -15,6 +15,8 @@ from nextcloud_mcp_server.models.notes import (
    NoteSearchResult,
    NotesSettings,
    SearchNotesResponse,
+    SemanticSearchNotesResponse,
+    SemanticSearchResult,
    UpdateNoteResponse,
 )

@@ -366,6 +368,145 @@ def configure_notes_tools(mcp: FastMCP):
                    )
                )

+    @mcp.tool()
+    @require_scopes("notes:read")
+    async def nc_notes_semantic_search(
+        query: str, ctx: Context, limit: int = 10, score_threshold: float = 0.7
+    ) -> SemanticSearchNotesResponse:
+        """
+        Semantic search for notes using vector embeddings.
+
+        Searches notes by meaning rather than exact keywords. Requires vector
+        database synchronization to be enabled (VECTOR_SYNC_ENABLED=true).
+
+        Args:
+            query: Natural language search query
+            limit: Maximum number of results to return (default: 10)
+            score_threshold: Minimum similarity score (0-1, default: 0.7)
+
+        Returns:
+            SemanticSearchNotesResponse with matching notes and similarity scores
+        """
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+        from nextcloud_mcp_server.config import get_settings
+        from nextcloud_mcp_server.embedding import get_embedding_service
+        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+        settings = get_settings()
+
+        # Check if vector sync is enabled
+        if not settings.vector_sync_enabled:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message="Semantic search is not enabled. Set VECTOR_SYNC_ENABLED=true and ensure vector database is configured.",
+                )
+            )
+
+        client = await get_client(ctx)
+        username = client.username
+
+        try:
+            # Generate embedding for query
+            embedding_service = get_embedding_service()
+            query_embedding = await embedding_service.embed(query)
+
+            # Search Qdrant with user filtering
+            qdrant_client = await get_qdrant_client()
+            search_results = await qdrant_client.search(
+                collection_name=settings.qdrant_collection,
+                query_vector=query_embedding,
+                query_filter=Filter(
+                    must=[
+                        FieldCondition(
+                            key="user_id",
+                            match=MatchValue(value=username),
+                        ),
+                        FieldCondition(
+                            key="doc_type",
+                            match=MatchValue(value="note"),
+                        ),
+                    ]
+                ),
+                limit=limit * 2,  # Get extra for filtering
+                score_threshold=score_threshold,
+                with_payload=True,
+                with_vectors=False,  # Don't return vectors to save bandwidth
+            )
+
+            # Deduplicate by note ID (multiple chunks per note)
+            seen_note_ids = set()
+            results = []
+
+            for result in search_results:
+                note_id = int(result.payload["doc_id"])
+
+                # Skip if we've already seen this note
+                if note_id in seen_note_ids:
+                    continue
+
+                seen_note_ids.add(note_id)
+
+                # Verify access via Nextcloud API (dual-phase authorization)
+                try:
+                    note = await client.notes.get_note(note_id)
+
+                    results.append(
+                        SemanticSearchResult(
+                            id=note_id,
+                            title=result.payload["title"],
+                            category=note.get("category", ""),
+                            excerpt=result.payload["excerpt"],
+                            score=result.score,
+                            chunk_index=result.payload["chunk_index"],
+                            total_chunks=result.payload["total_chunks"],
+                        )
+                    )
+
+                    if len(results) >= limit:
+                        break
+
+                except HTTPStatusError as e:
+                    if e.response.status_code == 403:
+                        # User lost access, skip this note
+                        continue
+                    elif e.response.status_code == 404:
+                        # Note was deleted but not yet removed from vector DB
+                        continue
+                    else:
+                        # Log other errors but continue processing
+                        logger.warning(
+                            f"Error verifying access to note {note_id}: {e.response.status_code}"
+                        )
+                        continue
+
+            return SemanticSearchNotesResponse(
+                results=results,
+                query=query,
+                total_found=len(results),
+                search_method="semantic",
+            )
+
+        except ValueError as e:
+            if "No embedding provider configured" in str(e):
+                raise McpError(
+                    ErrorData(
+                        code=-1,
+                        message="Embedding service not configured. Set OLLAMA_BASE_URL environment variable.",
+                    )
+                )
+            raise McpError(ErrorData(code=-1, message=f"Configuration error: {str(e)}"))
+        except RequestError as e:
+            raise McpError(
+                ErrorData(code=-1, message=f"Network error during search: {str(e)}")
+            )
+        except Exception as e:
+            logger.error(f"Semantic search error: {e}", exc_info=True)
+            raise McpError(
+                ErrorData(code=-1, message=f"Semantic search failed: {str(e)}")
+            )
+
    @mcp.tool()
    @require_scopes("notes:write")
    async def nc_notes_delete_note(note_id: int, ctx: Context) -> DeleteNoteResponse:
@@ -6,6 +6,7 @@ Processes documents from queue: fetches content, generates embeddings, stores in
 import asyncio
 import logging
 import time
+import uuid

 import anyio
 from httpx import HTTPStatusError
@@ -187,9 +188,14 @@ async def _index_document(
    points = []

    for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
+        # Generate deterministic UUID for point ID
+        # Using uuid5 with DNS namespace and combining doc info
+        point_name = f"{doc_task.doc_type}:{doc_task.doc_id}:chunk:{i}"
+        point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, point_name))
+
        points.append(
            PointStruct(
-                id=f"{doc_task.doc_type}_{doc_task.doc_id}_{i}",
+                id=point_id,
                vector=embedding,
                payload={
                    "user_id": doc_task.user_id,