Compare commits

..

1 Commits

Author SHA1 Message Date
smithery-ai[bot] d9f458a8b7 Update README 2025-11-22 18:40:14 +00:00
9 changed files with 63 additions and 103 deletions
-19
View File
@@ -1,22 +1,3 @@
## v0.46.1 (2025-11-22)
### Perf
- Optimize vector viz search performance
## v0.46.0 (2025-11-22)
### Feat
- Add Smithery CLI deployment support
- Implement ADR-016 Smithery stateless deployment mode
### Fix
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
- **smithery**: Use container runtime pattern for config discovery
- Add Smithery lifespan and auth mode detection
## v0.45.0 (2025-11-22) ## v0.45.0 (2025-11-22)
### Feat ### Feat
+2 -2
View File
@@ -2,8 +2,8 @@ apiVersion: v2
name: nextcloud-mcp-server name: nextcloud-mcp-server
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
type: application type: application
version: 0.46.1 version: 0.45.0
appVersion: "0.46.1" appVersion: "0.45.0"
keywords: keywords:
- nextcloud - nextcloud
- mcp - mcp
+57 -44
View File
@@ -218,41 +218,71 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
} }
) )
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve # Fetch vectors for specific matching chunks from Qdrant
vector_fetch_start = time.perf_counter() vector_fetch_start = time.perf_counter()
qdrant_client = await get_qdrant_client() qdrant_client = await get_qdrant_client()
# Build filters for each specific chunk
from qdrant_client.models import FieldCondition, Filter, MatchValue
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
# Collect point IDs from search results for batch retrieval # Fetch vectors in batches by filtering on chunk-specific fields
# point_id is the Qdrant internal ID returned by search algorithms for result in search_results:
point_ids = [r.point_id for r in search_results if r.point_id] chunk_start = result.chunk_start_offset
chunk_end = result.chunk_end_offset
if point_ids: # Build filter for this specific chunk
# Single batch retrieve call instead of N sequential scroll calls must_conditions = [
# This is ~50x faster for 50 results (1 HTTP request vs 50) get_placeholder_filter(), # Always exclude placeholders from user-facing queries
points_response = await qdrant_client.retrieve( FieldCondition(
key="doc_id",
match=MatchValue(value=result.id),
),
FieldCondition(
key="user_id",
match=MatchValue(value=username),
),
]
# Add chunk position filters if available
if chunk_start is not None:
must_conditions.append(
FieldCondition(
key="chunk_start_offset",
match=MatchValue(value=chunk_start),
)
)
if chunk_end is not None:
must_conditions.append(
FieldCondition(
key="chunk_end_offset",
match=MatchValue(value=chunk_end),
)
)
# Fetch this specific chunk vector
points_response = await qdrant_client.scroll(
collection_name=settings.get_collection_name(), collection_name=settings.get_collection_name(),
ids=point_ids, scroll_filter=Filter(must=must_conditions),
limit=1, # Only need the first match
with_vectors=["dense"], with_vectors=["dense"],
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"], with_payload=False,
) )
# Build chunk_vectors_map from batch response points = points_response[0]
for point in points_response: if points:
# Extract dense vector
point = points[0]
if point.vector is not None: if point.vector is not None:
# Extract dense vector (handle both named and unnamed vectors) # If named vectors (dict), extract "dense"
if isinstance(point.vector, dict): if isinstance(point.vector, dict):
vector = point.vector.get("dense") vector = point.vector.get("dense")
else: else:
vector = point.vector vector = point.vector
if vector is not None and point.payload: chunk_key = (result.id, chunk_start, chunk_end)
doc_id = point.payload.get("doc_id") chunk_vectors_map[chunk_key] = vector
chunk_start = point.payload.get("chunk_start_offset")
chunk_end = point.payload.get("chunk_end_offset")
chunk_key = (doc_id, chunk_start, chunk_end)
chunk_vectors_map[chunk_key] = vector
vector_fetch_duration = time.perf_counter() - vector_fetch_start vector_fetch_duration = time.perf_counter() - vector_fetch_start
@@ -311,23 +341,16 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
chunk_vectors = np.array(chunk_vectors) chunk_vectors = np.array(chunk_vectors)
# Reuse query embedding from search algorithm (avoids redundant embedding call) # Generate query embedding for visualization
query_embed_start = time.perf_counter() query_embed_start = time.perf_counter()
if search_algo.query_embedding is not None: from nextcloud_mcp_server.embedding.service import get_embedding_service
query_embedding = search_algo.query_embedding
logger.info(
f"Reusing query embedding from search algorithm "
f"(dimension={len(query_embedding)})"
)
else:
# Fallback: generate embedding if not available from search
from nextcloud_mcp_server.embedding.service import get_embedding_service
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query) query_embedding = await embedding_service.embed(query)
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
query_embed_duration = time.perf_counter() - query_embed_start query_embed_duration = time.perf_counter() - query_embed_start
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
# Combine query vector with chunk vectors for PCA # Combine query vector with chunk vectors for PCA
# Query will be the last point in the array # Query will be the last point in the array
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])]) all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
@@ -357,19 +380,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
) )
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
# Run in thread pool to avoid blocking the event loop (CPU-bound)
pca_start = time.perf_counter() pca_start = time.perf_counter()
pca = PCA(n_components=3)
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]: coords_3d = pca.fit_transform(all_vectors_normalized)
pca = PCA(n_components=3)
coords = pca.fit_transform(vectors)
return coords, pca
import anyio
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
lambda: _compute_pca(all_vectors_normalized)
)
pca_duration = time.perf_counter() - pca_start pca_duration = time.perf_counter() - pca_start
# After fit, these attributes are guaranteed to be set # After fit, these attributes are guaranteed to be set
@@ -37,9 +37,7 @@ class BM25SparseEmbeddingProvider:
def encode(self, text: str) -> dict[str, Any]: def encode(self, text: str) -> dict[str, Any]:
""" """
Generate BM25 sparse embedding for a single text (synchronous). Generate BM25 sparse embedding for a single text.
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
Args: Args:
text: Input text to encode text: Input text to encode
@@ -55,23 +53,6 @@ class BM25SparseEmbeddingProvider:
"values": sparse_embedding.values.tolist(), "values": sparse_embedding.values.tolist(),
} }
async def encode_async(self, text: str) -> dict[str, Any]:
"""
Generate BM25 sparse embedding for a single text (async).
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
Args:
text: Input text to encode
Returns:
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
"""
import anyio
# Run CPU-bound BM25 encoding in thread pool
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]: async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
""" """
Generate BM25 sparse embeddings for multiple texts (batched). Generate BM25 sparse embeddings for multiple texts (batched).
@@ -140,7 +140,6 @@ class SearchResult:
page_number: Page number for PDF documents (None for other doc types) page_number: Page number for PDF documents (None for other doc types)
chunk_index: Zero-based index of this chunk in the document chunk_index: Zero-based index of this chunk in the document
total_chunks: Total number of chunks in the document total_chunks: Total number of chunks in the document
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
""" """
id: int id: int
@@ -154,7 +153,6 @@ class SearchResult:
page_number: int | None = None page_number: int | None = None
chunk_index: int = 0 chunk_index: int = 0
total_chunks: int = 1 total_chunks: int = 1
point_id: str | None = None
def __post_init__(self): def __post_init__(self):
"""Validate score is non-negative. """Validate score is non-negative.
@@ -174,15 +172,8 @@ class SearchAlgorithm(ABC):
All search algorithms must implement the search() method with consistent All search algorithms must implement the search() method with consistent
interface, allowing them to be used interchangeably. interface, allowing them to be used interchangeably.
Attributes:
query_embedding: The query embedding generated during the last search.
Available after search() completes for algorithms that use embeddings.
Can be reused by callers to avoid redundant embedding generation.
""" """
query_embedding: list[float] | None = None
@abstractmethod @abstractmethod
async def search( async def search(
self, self,
+1 -4
View File
@@ -101,13 +101,11 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
# Generate dense embedding for semantic search # Generate dense embedding for semantic search
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
dense_embedding = await embedding_service.embed(query) dense_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = dense_embedding
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})") logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
# Generate sparse embedding for BM25 keyword search # Generate sparse embedding for BM25 keyword search
bm25_service = get_bm25_service() bm25_service = get_bm25_service()
sparse_embedding = await bm25_service.encode_async(query) sparse_embedding = bm25_service.encode(query)
logger.debug( logger.debug(
f"Generated sparse embedding " f"Generated sparse embedding "
f"({len(sparse_embedding['indices'])} non-zero terms)" f"({len(sparse_embedding['indices'])} non-zero terms)"
@@ -220,7 +218,6 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"), page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0), chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1), total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
) )
) )
-3
View File
@@ -78,8 +78,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
# Generate embedding for query # Generate embedding for query
embedding_service = get_embedding_service() embedding_service = get_embedding_service()
query_embedding = await embedding_service.embed(query) query_embedding = await embedding_service.embed(query)
# Store for reuse by callers (e.g., viz_routes PCA visualization)
self.query_embedding = query_embedding
logger.debug( logger.debug(
f"Generated embedding for query (dimension={len(query_embedding)})" f"Generated embedding for query (dimension={len(query_embedding)})"
) )
@@ -166,7 +164,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
page_number=result.payload.get("page_number"), page_number=result.payload.get("page_number"),
chunk_index=result.payload.get("chunk_index", 0), chunk_index=result.payload.get("chunk_index", 0),
total_chunks=result.payload.get("total_chunks", 1), total_chunks=result.payload.get("total_chunks", 1),
point_id=str(result.id), # Qdrant point ID for batch retrieval
) )
) )
+1 -1
View File
@@ -1,6 +1,6 @@
[project] [project]
name = "nextcloud-mcp-server" name = "nextcloud-mcp-server"
version = "0.46.1" version = "0.45.0"
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data" description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
authors = [ authors = [
{name = "Chris Coutinho", email = "chris@coutinho.io"} {name = "Chris Coutinho", email = "chris@coutinho.io"}
Generated
+1 -1
View File
@@ -1936,7 +1936,7 @@ wheels = [
[[package]] [[package]]
name = "nextcloud-mcp-server" name = "nextcloud-mcp-server"
version = "0.46.1" version = "0.45.0"
source = { editable = "." } source = { editable = "." }
dependencies = [ dependencies = [
{ name = "aiosqlite" }, { name = "aiosqlite" },