Compare commits

..

1 Commits

Author SHA1 Message Date
smithery-ai[bot] d9f458a8b7 Update README 2025-11-22 18:40:14 +00:00
9 changed files with 63 additions and 103 deletions
-19
View File
@@ -1,22 +1,3 @@
## v0.46.1 (2025-11-22)
### Perf
- Optimize vector viz search performance
## v0.46.0 (2025-11-22)
### Feat
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
### Fix
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
## v0.45.0 (2025-11-22)
### Feat
+2 -2
View File
@@ -2,8 +2,8 @@ apiVersion: v2
name: nextcloud-mcp-server
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
type: application
version: 0.46.1
appVersion: "0.46.1"
version: 0.45.0
appVersion: "0.45.0"
keywords:
- nextcloud
- mcp
+57 -44
View File
@@ -218,41 +218,71 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
}
)
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
# Fetch vectors for specific matching chunks from Qdrant
vector_fetch_start = time.perf_counter()
qdrant_client = await get_qdrant_client()
# Build filters for each specific chunk
from qdrant_client.models import FieldCondition, Filter, MatchValue
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
# Collect point IDs from search results for batch retrieval
# point_id is the Qdrant internal ID returned by search algorithms
point_ids = [r.point_id for r in search_results if r.point_id]
# Fetch vectors in batches by filtering on chunk-specific fields
for result in search_results:
chunk_start = result.chunk_start_offset
chunk_end = result.chunk_end_offset
if point_ids:
# Single batch retrieve call instead of N sequential scroll calls
# This is ~50x faster for 50 results (1 HTTP request vs 50)
points_response = await qdrant_client.retrieve(
# Build filter for this specific chunk
must_conditions = [
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
FieldCondition(
key="doc_id",
match=MatchValue(value=result.id),
),
FieldCondition(
key="user_id",
match=MatchValue(value=username),
),
]
# Add chunk position filters if available
if chunk_start is not None:
must_conditions.append(
FieldCondition(
key="chunk_start_offset",
match=MatchValue(value=chunk_start),
)
)
if chunk_end is not None:
must_conditions.append(
FieldCondition(
key="chunk_end_offset",
match=MatchValue(value=chunk_end),
)
)
# Fetch this specific chunk vector
points_response = await qdrant_client.scroll(
collection_name=settings.get_collection_name(),
ids=point_ids,
scroll_filter=Filter(must=must_conditions),
limit=1, # Only need the first match
with_vectors=["dense"],
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
with_payload=False,
)
# Build chunk_vectors_map from batch response
for point in points_response:
points = points_response[0]
if points:
# Extract dense vector
point = points[0]
if point.vector is not None:
# Extract dense vector (handle both named and unnamed vectors)
# If named vectors (dict), extract "dense"
if isinstance(point.vector, dict):
vector = point.vector.get("dense")
else:
vector = point.vector
if vector is not None and point.payload:
doc_id = point.payload.get("doc_id")
chunk_start = point.payload.get("chunk_start_offset")
chunk_end = point.payload.get("chunk_end_offset")
chunk_key = (doc_id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
chunk_key = (result.id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -311,23 +341,16 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
chunk_vectors = np.array(chunk_vectors)
# Reuse query embedding from search algorithm (avoids redundant embedding call)
# Generate query embedding for visualization
query_embed_start = time.perf_counter()
if search_algo.query_embedding is not None:
query_embedding = search_algo.query_embedding
logger.info(
f"Reusing query embedding from search algorithm "
f"(dimension={len(query_embedding)})"
)
else:
# Fallback: generate embedding if not available from search
from nextcloud_mcp_server.embedding.service import get_embedding_service
from nextcloud_mcp_server.embedding.service import get_embedding_service
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
query_embed_duration = time.perf_counter() - query_embed_start
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
# Combine query vector with chunk vectors for PCA
# Query will be the last point in the array
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -357,19 +380,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
)
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
# Run in thread pool to avoid blocking the event loop (CPU-bound)
pca_start = time.perf_counter()
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
pca = PCA(n_components=3)
coords = pca.fit_transform(vectors)
return coords, pca
import anyio
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
lambda: _compute_pca(all_vectors_normalized)
)
pca = PCA(n_components=3)
coords_3d = pca.fit_transform(all_vectors_normalized)
pca_duration = time.perf_counter() - pca_start
# After fit, these attributes are guaranteed to be set
@@ -37,9 +37,7 @@ class BM25SparseEmbeddingProvider:
def encode(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (synchronous).
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
Generate BM25 sparse embedding for a single text.
Args:
text: Input text to encode
@@ -55,23 +53,6 @@ class BM25SparseEmbeddingProvider:
"values": sparse_embedding.values.tolist(),
}
async def encode_async(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (async).
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
Args:
text: Input text to encode
Returns:
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
"""
import anyio
# Run CPU-bound BM25 encoding in thread pool
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
"""
Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,7 +140,6 @@ class SearchResult:
page_number: Page number for PDF documents (None for other doc types)
chunk_index: Zero-based index of this chunk in the document
total_chunks: Total number of chunks in the document
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
"""
id: int
@@ -154,7 +153,6 @@ class SearchResult:
page_number: int | None = None
chunk_index: int = 0
total_chunks: int = 1
point_id: str | None = None
def __post_init__(self):
"""Validate score is non-negative.
@@ -174,15 +172,8 @@ class SearchAlgorithm(ABC):
All search algorithms must implement the search() method with consistent
interface, allowing them to be used interchangeably.
Attributes:
query_embedding: The query embedding generated during the last search.
Available after search() completes for algorithms that use embeddings.
Can be reused by callers to avoid redundant embedding generation.
"""
query_embedding: list[float] | None = None
@abstractmethod
async def search(
self,
+1 -4
View File
@@ -101,13 +101,11 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
# Generate dense embedding for semantic search
embedding_service = get_embedding_service()
dense_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = dense_embedding
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
# Generate sparse embedding for BM25 keyword search
bm25_service = get_bm25_service()
sparse_embedding = await bm25_service.encode_async(query)
sparse_embedding = bm25_service.encode(query)
logger.debug(
f"Generated sparse embedding "
f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -220,7 +218,6 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
-3
View File
@@ -78,8 +78,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
# Generate embedding for query
embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = query_embedding
logger.debug(
f"Generated embedding for query (dimension={len(query_embedding)})"
)
@@ -166,7 +164,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
)
)
+1 -1
View File
@@ -1,6 +1,6 @@
[project]
name = "nextcloud-mcp-server"
version = "0.46.1"
version = "0.45.0"
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
authors = [
{name = "Chris Coutinho", email = "chris@coutinho.io"}
Generated
+1 -1
View File
@@ -1936,7 +1936,7 @@ wheels = [
[[package]]
name = "nextcloud-mcp-server"
version = "0.46.1"
version = "0.45.0"
source = { editable = "." }
dependencies = [
{ name = "aiosqlite" },