Compare commits

...

5 Commits

Author SHA1 Message Date
smithery-ai[bot] 63c65f48bf Update README 2025-11-22 18:56:34 +00:00
github-actions[bot] 57db18c6a3 bump: version 0.46.0 → 0.46.1 2025-11-22 18:54:11 +00:00
Chris Coutinho ea79e94842 Merge pull request #343 from cbcoutinho/fix/vector-viz-search
perf: Optimize vector viz search performance
2025-11-22 19:53:40 +01:00
Chris Coutinho b0612cfa0f perf: Optimize vector viz search performance
- Replace sequential Qdrant scroll calls with batch retrieve
  (50 HTTP requests → 1 request, ~50x faster vector fetch)

- Add point_id to SearchResult to enable batch retrieval by Qdrant point ID

- Reuse query embedding from search algorithm in viz_routes
  (eliminates redundant embedding call, saves ~30ms)

- Make BM25 encode() async with thread pool to avoid blocking event loop
  (~4.4s was blocking, now properly async)

- Run PCA computation in thread pool to avoid blocking event loop
  (~1.2s was blocking, now properly async)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-22 19:47:43 +01:00
github-actions[bot] 4e61d73da5 bump: version 0.45.0 → 0.46.0 2025-11-22 18:40:24 +00:00
10 changed files with 106 additions and 64 deletions
+19
View File
@@ -1,3 +1,22 @@
## v0.46.1 (2025-11-22)
### Perf
- Optimize vector viz search performance
## v0.46.0 (2025-11-22)
### Feat
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
### Fix
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
## v0.45.0 (2025-11-22)
### Feat
+3 -1
View File
@@ -1,11 +1,12 @@
```markdown
<p align="center">
<img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
</p>
# Nextcloud MCP Server
[![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
[![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
**A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
@@ -223,3 +224,4 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo
- [Model Context Protocol](https://github.com/modelcontextprotocol)
- [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
- [Nextcloud](https://nextcloud.com/)
```
+2 -2
View File
@@ -2,8 +2,8 @@ apiVersion: v2
name: nextcloud-mcp-server
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
type: application
version: 0.45.0
appVersion: "0.45.0"
version: 0.46.1
appVersion: "0.46.1"
keywords:
- nextcloud
- mcp
+44 -57
View File
@@ -218,71 +218,41 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
}
)
# Fetch vectors for specific matching chunks from Qdrant
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
vector_fetch_start = time.perf_counter()
qdrant_client = await get_qdrant_client()
# Build filters for each specific chunk
from qdrant_client.models import FieldCondition, Filter, MatchValue
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
# Fetch vectors in batches by filtering on chunk-specific fields
for result in search_results:
chunk_start = result.chunk_start_offset
chunk_end = result.chunk_end_offset
# Collect point IDs from search results for batch retrieval
# point_id is the Qdrant internal ID returned by search algorithms
point_ids = [r.point_id for r in search_results if r.point_id]
# Build filter for this specific chunk
must_conditions = [
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
FieldCondition(
key="doc_id",
match=MatchValue(value=result.id),
),
FieldCondition(
key="user_id",
match=MatchValue(value=username),
),
]
# Add chunk position filters if available
if chunk_start is not None:
must_conditions.append(
FieldCondition(
key="chunk_start_offset",
match=MatchValue(value=chunk_start),
)
)
if chunk_end is not None:
must_conditions.append(
FieldCondition(
key="chunk_end_offset",
match=MatchValue(value=chunk_end),
)
)
# Fetch this specific chunk vector
points_response = await qdrant_client.scroll(
if point_ids:
# Single batch retrieve call instead of N sequential scroll calls
# This is ~50x faster for 50 results (1 HTTP request vs 50)
points_response = await qdrant_client.retrieve(
collection_name=settings.get_collection_name(),
scroll_filter=Filter(must=must_conditions),
limit=1, # Only need the first match
ids=point_ids,
with_vectors=["dense"],
with_payload=False,
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
)
points = points_response[0]
if points:
# Extract dense vector
point = points[0]
# Build chunk_vectors_map from batch response
for point in points_response:
if point.vector is not None:
# If named vectors (dict), extract "dense"
# Extract dense vector (handle both named and unnamed vectors)
if isinstance(point.vector, dict):
vector = point.vector.get("dense")
else:
vector = point.vector
chunk_key = (result.id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
if vector is not None and point.payload:
doc_id = point.payload.get("doc_id")
chunk_start = point.payload.get("chunk_start_offset")
chunk_end = point.payload.get("chunk_end_offset")
chunk_key = (doc_id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -341,16 +311,23 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
chunk_vectors = np.array(chunk_vectors)
# Generate query embedding for visualization
# Reuse query embedding from search algorithm (avoids redundant embedding call)
query_embed_start = time.perf_counter()
from nextcloud_mcp_server.embedding.service import get_embedding_service
if search_algo.query_embedding is not None:
query_embedding = search_algo.query_embedding
logger.info(
f"Reusing query embedding from search algorithm "
f"(dimension={len(query_embedding)})"
)
else:
# Fallback: generate embedding if not available from search
from nextcloud_mcp_server.embedding.service import get_embedding_service
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
query_embed_duration = time.perf_counter() - query_embed_start
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
# Combine query vector with chunk vectors for PCA
# Query will be the last point in the array
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -380,9 +357,19 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
)
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
# Run in thread pool to avoid blocking the event loop (CPU-bound)
pca_start = time.perf_counter()
pca = PCA(n_components=3)
coords_3d = pca.fit_transform(all_vectors_normalized)
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
pca = PCA(n_components=3)
coords = pca.fit_transform(vectors)
return coords, pca
import anyio
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
lambda: _compute_pca(all_vectors_normalized)
)
pca_duration = time.perf_counter() - pca_start
# After fit, these attributes are guaranteed to be set
@@ -37,7 +37,9 @@ class BM25SparseEmbeddingProvider:
def encode(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text.
Generate BM25 sparse embedding for a single text (synchronous).
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
Args:
text: Input text to encode
@@ -53,6 +55,23 @@ class BM25SparseEmbeddingProvider:
"values": sparse_embedding.values.tolist(),
}
async def encode_async(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (async).
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
Args:
text: Input text to encode
Returns:
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
"""
import anyio
# Run CPU-bound BM25 encoding in thread pool
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
"""
Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,6 +140,7 @@ class SearchResult:
page_number: Page number for PDF documents (None for other doc types)
chunk_index: Zero-based index of this chunk in the document
total_chunks: Total number of chunks in the document
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
"""
id: int
@@ -153,6 +154,7 @@ class SearchResult:
page_number: int | None = None
chunk_index: int = 0
total_chunks: int = 1
point_id: str | None = None
def __post_init__(self):
"""Validate score is non-negative.
@@ -172,8 +174,15 @@ class SearchAlgorithm(ABC):
All search algorithms must implement the search() method with consistent
interface, allowing them to be used interchangeably.
Attributes:
query_embedding: The query embedding generated during the last search.
Available after search() completes for algorithms that use embeddings.
Can be reused by callers to avoid redundant embedding generation.
"""
query_embedding: list[float] | None = None
@abstractmethod
async def search(
self,
+4 -1
View File
@@ -101,11 +101,13 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
# Generate dense embedding for semantic search
embedding_service = get_embedding_service()
dense_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = dense_embedding
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
# Generate sparse embedding for BM25 keyword search
bm25_service = get_bm25_service()
sparse_embedding = bm25_service.encode(query)
sparse_embedding = await bm25_service.encode_async(query)
logger.debug(
f"Generated sparse embedding "
f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -218,6 +220,7 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
+3
View File
@@ -78,6 +78,8 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
# Generate embedding for query
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = query_embedding
logger.debug(
f"Generated embedding for query (dimension={len(query_embedding)})"
)
@@ -164,6 +166,7 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
+1 -1
View File
@@ -1,6 +1,6 @@
[project]
name = "nextcloud-mcp-server"
version = "0.45.0"
version = "0.46.1"
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
authors = [
{name = "Chris Coutinho", email = "chris@coutinho.io"}
Generated
+1 -1
View File
@@ -1936,7 +1936,7 @@ wheels = [
[[package]]
name = "nextcloud-mcp-server"
version = "0.45.0"
version = "0.46.1"
source = { editable = "." }
dependencies = [
{ name = "aiosqlite" },