Update README

2025-11-22 18:40:14 +00:00
9 changed files with 63 additions and 103 deletions
@@ -1,22 +1,3 @@
-## v0.46.1 (2025-11-22)
-
-### Perf
-
- Optimize vector viz search performance
-
-## v0.46.0 (2025-11-22)
-
-### Feat
-
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
-
-### Fix
-
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
-
 ## v0.45.0 (2025-11-22)

 ### Feat
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.46.1
-appVersion: "0.46.1"
+version: 0.45.0
+appVersion: "0.45.0"
 keywords:
  - nextcloud
  - mcp
@@ -218,41 +218,71 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
                }
            )

-        # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
+        # Fetch vectors for specific matching chunks from Qdrant
        vector_fetch_start = time.perf_counter()
        qdrant_client = await get_qdrant_client()

+        # Build filters for each specific chunk
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
        chunk_vectors_map = {}  # Map (doc_id, chunk_start, chunk_end) -> vector

-        # Collect point IDs from search results for batch retrieval
-        # point_id is the Qdrant internal ID returned by search algorithms
-        point_ids = [r.point_id for r in search_results if r.point_id]
+        # Fetch vectors in batches by filtering on chunk-specific fields
+        for result in search_results:
+            chunk_start = result.chunk_start_offset
+            chunk_end = result.chunk_end_offset

-        if point_ids:
-            # Single batch retrieve call instead of N sequential scroll calls
-            # This is ~50x faster for 50 results (1 HTTP request vs 50)
-            points_response = await qdrant_client.retrieve(
+            # Build filter for this specific chunk
+            must_conditions = [
+                get_placeholder_filter(),  # Always exclude placeholders from user-facing queries
+                FieldCondition(
+                    key="doc_id",
+                    match=MatchValue(value=result.id),
+                ),
+                FieldCondition(
+                    key="user_id",
+                    match=MatchValue(value=username),
+                ),
+            ]
+
+            # Add chunk position filters if available
+            if chunk_start is not None:
+                must_conditions.append(
+                    FieldCondition(
+                        key="chunk_start_offset",
+                        match=MatchValue(value=chunk_start),
+                    )
+                )
+            if chunk_end is not None:
+                must_conditions.append(
+                    FieldCondition(
+                        key="chunk_end_offset",
+                        match=MatchValue(value=chunk_end),
+                    )
+                )
+
+            # Fetch this specific chunk vector
+            points_response = await qdrant_client.scroll(
                collection_name=settings.get_collection_name(),
-                ids=point_ids,
+                scroll_filter=Filter(must=must_conditions),
+                limit=1,  # Only need the first match
                with_vectors=["dense"],
-                with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
+                with_payload=False,
            )

-            # Build chunk_vectors_map from batch response
-            for point in points_response:
+            points = points_response[0]
+            if points:
+                # Extract dense vector
+                point = points[0]
                if point.vector is not None:
-                    # Extract dense vector (handle both named and unnamed vectors)
+                    # If named vectors (dict), extract "dense"
                    if isinstance(point.vector, dict):
                        vector = point.vector.get("dense")
                    else:
                        vector = point.vector

-                    if vector is not None and point.payload:
-                        doc_id = point.payload.get("doc_id")
-                        chunk_start = point.payload.get("chunk_start_offset")
-                        chunk_end = point.payload.get("chunk_end_offset")
-                        chunk_key = (doc_id, chunk_start, chunk_end)
-                        chunk_vectors_map[chunk_key] = vector
+                    chunk_key = (result.id, chunk_start, chunk_end)
+                    chunk_vectors_map[chunk_key] = vector

        vector_fetch_duration = time.perf_counter() - vector_fetch_start

@@ -311,23 +341,16 @@ async def vector_visualization_search(request: Request) -> JSONResponse:

        chunk_vectors = np.array(chunk_vectors)

-        # Reuse query embedding from search algorithm (avoids redundant embedding call)
+        # Generate query embedding for visualization
        query_embed_start = time.perf_counter()
-        if search_algo.query_embedding is not None:
-            query_embedding = search_algo.query_embedding
-            logger.info(
-                f"Reusing query embedding from search algorithm "
-                f"(dimension={len(query_embedding)})"
-            )
-        else:
-            # Fallback: generate embedding if not available from search
-            from nextcloud_mcp_server.embedding.service import get_embedding_service
+        from nextcloud_mcp_server.embedding.service import get_embedding_service

-            embedding_service = get_embedding_service()
-            query_embedding = await embedding_service.embed(query)
-            logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
+        embedding_service = get_embedding_service()
+        query_embedding = await embedding_service.embed(query)
        query_embed_duration = time.perf_counter() - query_embed_start

+        logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
+
        # Combine query vector with chunk vectors for PCA
        # Query will be the last point in the array
        all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -357,19 +380,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
        )

        # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
-        # Run in thread pool to avoid blocking the event loop (CPU-bound)
        pca_start = time.perf_counter()
-
-        def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
-            pca = PCA(n_components=3)
-            coords = pca.fit_transform(vectors)
-            return coords, pca
-
-        import anyio
-
-        coords_3d, pca = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
-            lambda: _compute_pca(all_vectors_normalized)
-        )
+        pca = PCA(n_components=3)
+        coords_3d = pca.fit_transform(all_vectors_normalized)
        pca_duration = time.perf_counter() - pca_start

        # After fit, these attributes are guaranteed to be set
@@ -37,9 +37,7 @@ class BM25SparseEmbeddingProvider:

    def encode(self, text: str) -> dict[str, Any]:
        """
-        Generate BM25 sparse embedding for a single text (synchronous).
-
-        Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
+        Generate BM25 sparse embedding for a single text.

        Args:
            text: Input text to encode
@@ -55,23 +53,6 @@ class BM25SparseEmbeddingProvider:
            "values": sparse_embedding.values.tolist(),
        }

-    async def encode_async(self, text: str) -> dict[str, Any]:
-        """
-        Generate BM25 sparse embedding for a single text (async).
-
-        Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
-
-        Args:
-            text: Input text to encode
-
-        Returns:
-            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
-        """
-        import anyio
-
-        # Run CPU-bound BM25 encoding in thread pool
-        return await anyio.to_thread.run_sync(lambda: self.encode(text))  # type: ignore[attr-defined]
-
    async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
        """
        Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,7 +140,6 @@ class SearchResult:
        page_number: Page number for PDF documents (None for other doc types)
        chunk_index: Zero-based index of this chunk in the document
        total_chunks: Total number of chunks in the document
-        point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
    """

    id: int
@@ -154,7 +153,6 @@ class SearchResult:
    page_number: int | None = None
    chunk_index: int = 0
    total_chunks: int = 1
-    point_id: str | None = None

    def __post_init__(self):
        """Validate score is non-negative.
@@ -174,15 +172,8 @@ class SearchAlgorithm(ABC):

    All search algorithms must implement the search() method with consistent
    interface, allowing them to be used interchangeably.
-
-    Attributes:
-        query_embedding: The query embedding generated during the last search.
-            Available after search() completes for algorithms that use embeddings.
-            Can be reused by callers to avoid redundant embedding generation.
    """

-    query_embedding: list[float] | None = None
-
    @abstractmethod
    async def search(
        self,
@@ -101,13 +101,11 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
        # Generate dense embedding for semantic search
        embedding_service = get_embedding_service()
        dense_embedding = await embedding_service.embed(query)
-        # Store for reuse by callers (e.g., viz_routes PCA visualization)
-        self.query_embedding = dense_embedding
        logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")

        # Generate sparse embedding for BM25 keyword search
        bm25_service = get_bm25_service()
-        sparse_embedding = await bm25_service.encode_async(query)
+        sparse_embedding = bm25_service.encode(query)
        logger.debug(
            f"Generated sparse embedding "
            f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -220,7 +218,6 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
                    page_number=result.payload.get("page_number"),
                    chunk_index=result.payload.get("chunk_index", 0),
                    total_chunks=result.payload.get("total_chunks", 1),
-                    point_id=str(result.id),  # Qdrant point ID for batch retrieval
                )
            )

@@ -78,8 +78,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
        # Generate embedding for query
        embedding_service = get_embedding_service()
        query_embedding = await embedding_service.embed(query)
-        # Store for reuse by callers (e.g., viz_routes PCA visualization)
-        self.query_embedding = query_embedding
        logger.debug(
            f"Generated embedding for query (dimension={len(query_embedding)})"
        )
@@ -166,7 +164,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
                    page_number=result.payload.get("page_number"),
                    chunk_index=result.payload.get("chunk_index", 0),
                    total_chunks=result.payload.get("total_chunks", 1),
-                    point_id=str(result.id),  # Qdrant point ID for batch retrieval
                )
            )

@@ -1,6 +1,6 @@
 [project]
 name = "nextcloud-mcp-server"
-version = "0.46.1"
+version = "0.45.0"
 description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
 authors = [
    {name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -1936,7 +1936,7 @@ wheels = [

 [[package]]
 name = "nextcloud-mcp-server"
-version = "0.46.1"
+version = "0.45.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiosqlite" },