Compare commits

...

5 Commits

Author SHA1 Message Date
smithery-ai[bot] 63c65f48bf Update README 2025-11-22 18:56:34 +00:00
github-actions[bot] 57db18c6a3 bump: version 0.46.0 → 0.46.1 2025-11-22 18:54:11 +00:00
Chris Coutinho ea79e94842 Merge pull request #343 from cbcoutinho/fix/vector-viz-search
perf: Optimize vector viz search performance
2025-11-22 19:53:40 +01:00
Chris Coutinho b0612cfa0f perf: Optimize vector viz search performance
- Replace sequential Qdrant scroll calls with batch retrieve
  (50 HTTP requests → 1 request, ~50x faster vector fetch)

- Add point_id to SearchResult to enable batch retrieval by Qdrant point ID

- Reuse query embedding from search algorithm in viz_routes
  (eliminates redundant embedding call, saves ~30ms)

- Make BM25 encode() async with thread pool to avoid blocking event loop
  (~4.4s was blocking, now properly async)

- Run PCA computation in thread pool to avoid blocking event loop
  (~1.2s was blocking, now properly async)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 19:47:43 +01:00
github-actions[bot] 4e61d73da5 bump: version 0.45.0 → 0.46.0 2025-11-22 18:40:24 +00:00
10 changed files with 106 additions and 64 deletions
+19
View File
@@ -1,3 +1,22 @@
## v0.46.1 (2025-11-22)
### Perf
- Optimize vector viz search performance
## v0.46.0 (2025-11-22)
### Feat
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
### Fix
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
## v0.45.0 (2025-11-22) ## v0.45.0 (2025-11-22)
### Feat ### Feat
+3 -1
View File
@@ -1,11 +1,12 @@
```markdown
<p align="center"> <p align="center">
<img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128"> <img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
</p> </p>
# Nextcloud MCP Server # Nextcloud MCP Server
[![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) [![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
[![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
**A production-ready MCP server that connects AI assistants to your Nextcloud instance.** **A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
@@ -223,3 +224,4 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo
- [Model Context Protocol](https://github.com/modelcontextprotocol) - [Model Context Protocol](https://github.com/modelcontextprotocol)
- [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk) - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
- [Nextcloud](https://nextcloud.com/) - [Nextcloud](https://nextcloud.com/)
```
+2 -2
View File
@@ -2,8 +2,8 @@ apiVersion: v2
name: nextcloud-mcp-server name: nextcloud-mcp-server
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
type: application type: application
version: 0.45.0 version: 0.46.1
appVersion: "0.45.0" appVersion: "0.46.1"
keywords: keywords:
- nextcloud - nextcloud
- mcp - mcp
+44 -57
View File
@@ -218,71 +218,41 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
} }
) )
# Fetch vectors for specific matching chunks from Qdrant # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
vector_fetch_start = time.perf_counter() vector_fetch_start = time.perf_counter()
qdrant_client = await get_qdrant_client() qdrant_client = await get_qdrant_client()
# Build filters for each specific chunk
from qdrant_client.models import FieldCondition, Filter, MatchValue
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
# Fetch vectors in batches by filtering on chunk-specific fields # Collect point IDs from search results for batch retrieval
for result in search_results: # point_id is the Qdrant internal ID returned by search algorithms
chunk_start = result.chunk_start_offset point_ids = [r.point_id for r in search_results if r.point_id]
chunk_end = result.chunk_end_offset
# Build filter for this specific chunk if point_ids:
must_conditions = [ # Single batch retrieve call instead of N sequential scroll calls
get_placeholder_filter(), # Always exclude placeholders from user-facing queries # This is ~50x faster for 50 results (1 HTTP request vs 50)
FieldCondition( points_response = await qdrant_client.retrieve(
key="doc_id",
match=MatchValue(value=result.id),
),
FieldCondition(
key="user_id",
match=MatchValue(value=username),
),
]
# Add chunk position filters if available
if chunk_start is not None:
must_conditions.append(
FieldCondition(
key="chunk_start_offset",
match=MatchValue(value=chunk_start),
)
)
if chunk_end is not None:
must_conditions.append(
FieldCondition(
key="chunk_end_offset",
match=MatchValue(value=chunk_end),
)
)
# Fetch this specific chunk vector
points_response = await qdrant_client.scroll(
collection_name=settings.get_collection_name(), collection_name=settings.get_collection_name(),
scroll_filter=Filter(must=must_conditions), ids=point_ids,
limit=1, # Only need the first match
with_vectors=["dense"], with_vectors=["dense"],
with_payload=False, with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
) )
points = points_response[0] # Build chunk_vectors_map from batch response
if points: for point in points_response:
# Extract dense vector
point = points[0]
if point.vector is not None: if point.vector is not None:
# If named vectors (dict), extract "dense" # Extract dense vector (handle both named and unnamed vectors)
if isinstance(point.vector, dict): if isinstance(point.vector, dict):
vector = point.vector.get("dense") vector = point.vector.get("dense")
else: else:
vector = point.vector vector = point.vector
chunk_key = (result.id, chunk_start, chunk_end) if vector is not None and point.payload:
chunk_vectors_map[chunk_key] = vector doc_id = point.payload.get("doc_id")
chunk_start = point.payload.get("chunk_start_offset")
chunk_end = point.payload.get("chunk_end_offset")
chunk_key = (doc_id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
vector_fetch_duration = time.perf_counter() - vector_fetch_start vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -341,16 +311,23 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
chunk_vectors = np.array(chunk_vectors) chunk_vectors = np.array(chunk_vectors)
# Generate query embedding for visualization # Reuse query embedding from search algorithm (avoids redundant embedding call)
query_embed_start = time.perf_counter() query_embed_start = time.perf_counter()
from nextcloud_mcp_server.embedding.service import get_embedding_service if search_algo.query_embedding is not None:
query_embedding = search_algo.query_embedding
logger.info(
f"Reusing query embedding from search algorithm "
f"(dimension={len(query_embedding)})"
)
else:
# Fallback: generate embedding if not available from search
from nextcloud_mcp_server.embedding.service import get_embedding_service
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query) query_embedding = await embedding_service.embed(query)
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
query_embed_duration = time.perf_counter() - query_embed_start query_embed_duration = time.perf_counter() - query_embed_start
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
# Combine query vector with chunk vectors for PCA # Combine query vector with chunk vectors for PCA
# Query will be the last point in the array # Query will be the last point in the array
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])]) all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -380,9 +357,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
) )
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
# Run in thread pool to avoid blocking the event loop (CPU-bound)
pca_start = time.perf_counter() pca_start = time.perf_counter()
pca = PCA(n_components=3)
coords_3d = pca.fit_transform(all_vectors_normalized) def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
pca = PCA(n_components=3)
coords = pca.fit_transform(vectors)
return coords, pca
import anyio
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
lambda: _compute_pca(all_vectors_normalized)
)
pca_duration = time.perf_counter() - pca_start pca_duration = time.perf_counter() - pca_start
# After fit, these attributes are guaranteed to be set # After fit, these attributes are guaranteed to be set
@@ -37,7 +37,9 @@ class BM25SparseEmbeddingProvider:
def encode(self, text: str) -> dict[str, Any]: def encode(self, text: str) -> dict[str, Any]:
""" """
Generate BM25 sparse embedding for a single text. Generate BM25 sparse embedding for a single text (synchronous).
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
Args: Args:
text: Input text to encode text: Input text to encode
@@ -53,6 +55,23 @@ class BM25SparseEmbeddingProvider:
"values": sparse_embedding.values.tolist(), "values": sparse_embedding.values.tolist(),
} }
async def encode_async(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (async).
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
Args:
text: Input text to encode
Returns:
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
"""
import anyio
# Run CPU-bound BM25 encoding in thread pool
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]: async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
""" """
Generate BM25 sparse embeddings for multiple texts (batched). Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,6 +140,7 @@ class SearchResult:
page_number: Page number for PDF documents (None for other doc types) page_number: Page number for PDF documents (None for other doc types)
chunk_index: Zero-based index of this chunk in the document chunk_index: Zero-based index of this chunk in the document
total_chunks: Total number of chunks in the document total_chunks: Total number of chunks in the document
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
""" """
id: int id: int
@@ -153,6 +154,7 @@ class SearchResult:
page_number: int | None = None page_number: int | None = None
chunk_index: int = 0 chunk_index: int = 0
total_chunks: int = 1 total_chunks: int = 1
point_id: str | None = None
def __post_init__(self): def __post_init__(self):
"""Validate score is non-negative. """Validate score is non-negative.
@@ -172,8 +174,15 @@ class SearchAlgorithm(ABC):
All search algorithms must implement the search() method with consistent All search algorithms must implement the search() method with consistent
interface, allowing them to be used interchangeably. interface, allowing them to be used interchangeably.
Attributes:
query_embedding: The query embedding generated during the last search.
Available after search() completes for algorithms that use embeddings.
Can be reused by callers to avoid redundant embedding generation.
""" """
query_embedding: list[float] | None = None
@abstractmethod @abstractmethod
async def search( async def search(
self, self,
+4 -1
View File
@@ -101,11 +101,13 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
# Generate dense embedding for semantic search # Generate dense embedding for semantic search
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
dense_embedding = await embedding_service.embed(query) dense_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = dense_embedding
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})") logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
# Generate sparse embedding for BM25 keyword search # Generate sparse embedding for BM25 keyword search
bm25_service = get_bm25_service() bm25_service = get_bm25_service()
sparse_embedding = bm25_service.encode(query) sparse_embedding = await bm25_service.encode_async(query)
logger.debug( logger.debug(
f"Generated sparse embedding " f"Generated sparse embedding "
f"({len(sparse_embedding['indices'])} non-zero terms)" f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -218,6 +220,7 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"), page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0), chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1), total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
) )
) )
+3
View File
@@ -78,6 +78,8 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
# Generate embedding for query # Generate embedding for query
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query) query_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = query_embedding
logger.debug( logger.debug(
f"Generated embedding for query (dimension={len(query_embedding)})" f"Generated embedding for query (dimension={len(query_embedding)})"
) )
@@ -164,6 +166,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"), page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0), chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1), total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
) )
) )
+1 -1
View File
@@ -1,6 +1,6 @@
[project] [project]
name = "nextcloud-mcp-server" name = "nextcloud-mcp-server"
version = "0.45.0" version = "0.46.1"
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data" description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
authors = [ authors = [
{name = "Chris Coutinho", email = "chris@coutinho.io"} {name = "Chris Coutinho", email = "chris@coutinho.io"}
Generated
+1 -1
View File
@@ -1936,7 +1936,7 @@ wheels = [
[[package]] [[package]]
name = "nextcloud-mcp-server" name = "nextcloud-mcp-server"
version = "0.45.0" version = "0.46.1"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "aiosqlite" }, { name = "aiosqlite" },