Update README

2025-11-22 18:40:14 +00:00
9 changed files with 63 additions and 103 deletions
@@ -1,22 +1,3 @@
 ## v0.46.1 (2025-11-22)
 ### Perf
 - Optimize vector viz search performance
 ## v0.46.0 (2025-11-22)
 ### Feat
 - Add Smithery CLI deployment support
 - Implement ADR-016 Smithery stateless deployment mode
 ### Fix
 - **smithery**: Add JSON Schema metadata to mcp-config endpoint
 - **smithery**: Use container runtime pattern for config discovery
 - Add Smithery lifespan and auth mode detection
 ## v0.45.0 (2025-11-22)
 ### Feat
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.46.1
+version: 0.45.0
-appVersion: "0.46.1"
+appVersion: "0.45.0"
 keywords:
  - nextcloud
  - mcp
@@ -218,41 +218,71 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
                }
            )
-        # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
+        # Fetch vectors for specific matching chunks from Qdrant
        vector_fetch_start = time.perf_counter()
        qdrant_client = await get_qdrant_client()
        # Build filters for each specific chunk
        from qdrant_client.models import FieldCondition, Filter, MatchValue
        chunk_vectors_map = {}  # Map (doc_id, chunk_start, chunk_end) -> vector
-        # Collect point IDs from search results for batch retrieval
+        # Fetch vectors in batches by filtering on chunk-specific fields
-        # point_id is the Qdrant internal ID returned by search algorithms
+        for result in search_results:
-        point_ids = [r.point_id for r in search_results if r.point_id]
+            chunk_start = result.chunk_start_offset
            chunk_end = result.chunk_end_offset
-        if point_ids:
+            # Build filter for this specific chunk
-            # Single batch retrieve call instead of N sequential scroll calls
+            must_conditions = [
-            # This is ~50x faster for 50 results (1 HTTP request vs 50)
+                get_placeholder_filter(),  # Always exclude placeholders from user-facing queries
-            points_response = await qdrant_client.retrieve(
+                FieldCondition(
                    key="doc_id",
                    match=MatchValue(value=result.id),
                ),
                FieldCondition(
                    key="user_id",
                    match=MatchValue(value=username),
                ),
            ]
            # Add chunk position filters if available
            if chunk_start is not None:
                must_conditions.append(
                    FieldCondition(
                        key="chunk_start_offset",
                        match=MatchValue(value=chunk_start),
                    )
                )
            if chunk_end is not None:
                must_conditions.append(
                    FieldCondition(
                        key="chunk_end_offset",
                        match=MatchValue(value=chunk_end),
                    )
                )
            # Fetch this specific chunk vector
            points_response = await qdrant_client.scroll(
                collection_name=settings.get_collection_name(),
-                ids=point_ids,
+                scroll_filter=Filter(must=must_conditions),
                limit=1,  # Only need the first match
                with_vectors=["dense"],
-                with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
+                with_payload=False,
            )
-            # Build chunk_vectors_map from batch response
+            points = points_response[0]
-            for point in points_response:
+            if points:
                # Extract dense vector
                point = points[0]
                if point.vector is not None:
-                    # Extract dense vector (handle both named and unnamed vectors)
+                    # If named vectors (dict), extract "dense"
                    if isinstance(point.vector, dict):
                        vector = point.vector.get("dense")
                    else:
                        vector = point.vector
-                    if vector is not None and point.payload:
+                    chunk_key = (result.id, chunk_start, chunk_end)
-                        doc_id = point.payload.get("doc_id")
+                    chunk_vectors_map[chunk_key] = vector
                        chunk_start = point.payload.get("chunk_start_offset")
                        chunk_end = point.payload.get("chunk_end_offset")
                        chunk_key = (doc_id, chunk_start, chunk_end)
                        chunk_vectors_map[chunk_key] = vector
        vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -311,23 +341,16 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
        chunk_vectors = np.array(chunk_vectors)
-        # Reuse query embedding from search algorithm (avoids redundant embedding call)
+        # Generate query embedding for visualization
        query_embed_start = time.perf_counter()
-        if search_algo.query_embedding is not None:
+        from nextcloud_mcp_server.embedding.service import get_embedding_service
            query_embedding = search_algo.query_embedding
            logger.info(
                f"Reusing query embedding from search algorithm "
                f"(dimension={len(query_embedding)})"
            )
        else:
            # Fallback: generate embedding if not available from search
            from nextcloud_mcp_server.embedding.service import get_embedding_service
-            embedding_service = get_embedding_service()
+        embedding_service = get_embedding_service()
-            query_embedding = await embedding_service.embed(query)
+        query_embedding = await embedding_service.embed(query)
            logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
        query_embed_duration = time.perf_counter() - query_embed_start
        logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
        # Combine query vector with chunk vectors for PCA
        # Query will be the last point in the array
        all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -357,19 +380,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
        )
        # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
        # Run in thread pool to avoid blocking the event loop (CPU-bound)
        pca_start = time.perf_counter()
-
+        pca = PCA(n_components=3)
-        def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
+        coords_3d = pca.fit_transform(all_vectors_normalized)
            pca = PCA(n_components=3)
            coords = pca.fit_transform(vectors)
            return coords, pca
        import anyio
        coords_3d, pca = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
            lambda: _compute_pca(all_vectors_normalized)
        )
        pca_duration = time.perf_counter() - pca_start
        # After fit, these attributes are guaranteed to be set
@@ -37,9 +37,7 @@ class BM25SparseEmbeddingProvider:
    def encode(self, text: str) -> dict[str, Any]:
        """
-        Generate BM25 sparse embedding for a single text (synchronous).
+        Generate BM25 sparse embedding for a single text.
        Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
        Args:
            text: Input text to encode
@@ -55,23 +53,6 @@ class BM25SparseEmbeddingProvider:
            "values": sparse_embedding.values.tolist(),
        }
    async def encode_async(self, text: str) -> dict[str, Any]:
        """
        Generate BM25 sparse embedding for a single text (async).
        Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
        Args:
            text: Input text to encode
        Returns:
            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
        """
        import anyio
        # Run CPU-bound BM25 encoding in thread pool
        return await anyio.to_thread.run_sync(lambda: self.encode(text))  # type: ignore[attr-defined]
    async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
        """
        Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,7 +140,6 @@ class SearchResult:
        page_number: Page number for PDF documents (None for other doc types)
        chunk_index: Zero-based index of this chunk in the document
        total_chunks: Total number of chunks in the document
        point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
    """
    id: int
@@ -154,7 +153,6 @@ class SearchResult:
    page_number: int | None = None
    chunk_index: int = 0
    total_chunks: int = 1
    point_id: str | None = None
    def __post_init__(self):
        """Validate score is non-negative.
@@ -174,15 +172,8 @@ class SearchAlgorithm(ABC):
    All search algorithms must implement the search() method with consistent
    interface, allowing them to be used interchangeably.
    Attributes:
        query_embedding: The query embedding generated during the last search.
            Available after search() completes for algorithms that use embeddings.
            Can be reused by callers to avoid redundant embedding generation.
    """
    query_embedding: list[float] | None = None
    @abstractmethod
    async def search(
        self,
@@ -101,13 +101,11 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
        # Generate dense embedding for semantic search
        embedding_service = get_embedding_service()
        dense_embedding = await embedding_service.embed(query)
        # Store for reuse by callers (e.g., viz_routes PCA visualization)
        self.query_embedding = dense_embedding
        logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
        # Generate sparse embedding for BM25 keyword search
        bm25_service = get_bm25_service()
-        sparse_embedding = await bm25_service.encode_async(query)
+        sparse_embedding = bm25_service.encode(query)
        logger.debug(
            f"Generated sparse embedding "
            f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -220,7 +218,6 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
                    page_number=result.payload.get("page_number"),
                    chunk_index=result.payload.get("chunk_index", 0),
                    total_chunks=result.payload.get("total_chunks", 1),
                    point_id=str(result.id),  # Qdrant point ID for batch retrieval
                )
            )
@@ -78,8 +78,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
        # Generate embedding for query
        embedding_service = get_embedding_service()
        query_embedding = await embedding_service.embed(query)
        # Store for reuse by callers (e.g., viz_routes PCA visualization)
        self.query_embedding = query_embedding
        logger.debug(
            f"Generated embedding for query (dimension={len(query_embedding)})"
        )
@@ -166,7 +164,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
                    page_number=result.payload.get("page_number"),
                    chunk_index=result.payload.get("chunk_index", 0),
                    total_chunks=result.payload.get("total_chunks", 1),
                    point_id=str(result.id),  # Qdrant point ID for batch retrieval
                )
            )
@@ -1,6 +1,6 @@
 [project]
 name = "nextcloud-mcp-server"
-version = "0.46.1"
+version = "0.45.0"
 description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
 authors = [
    {name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -1936,7 +1936,7 @@ wheels = [
 [[package]]
 name = "nextcloud-mcp-server"
-version = "0.46.1"
+version = "0.45.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiosqlite" },