Files
nextcloud-mcp-server/nextcloud_mcp_server/vector/qdrant_client.py
T
Chris Coutinho 6fe5596c13 feat: Implement BM25 hybrid search with native Qdrant RRF fusion
Replace custom keyword/fuzzy search algorithms with industry-standard BM25
sparse vectors, combined with dense semantic vectors using Qdrant's native
Reciprocal Rank Fusion (RRF). This consolidates search architecture and
improves relevance for both semantic and keyword queries.

Key changes:
- Add fastembed dependency for BM25 sparse vector generation
- Update Qdrant collection schema to support named vectors (dense + sparse)
- Create BM25SparseEmbeddingProvider using FastEmbed's Qdrant/bm25 model
- Implement BM25HybridSearchAlgorithm with native Qdrant RRF prefetch
- Update document processor to generate both dense and sparse embeddings
- Simplify nc_semantic_search() tool to use BM25 hybrid only
- Remove legacy keyword.py, fuzzy.py, and custom hybrid.py (736 lines)
- Update ADR-014 with implementation notes and test results

Benefits:
- Consolidated architecture (single Qdrant database)
- Native database-level RRF fusion (more efficient)
- Industry-standard BM25 (replaces brittle custom keyword search)
- Better relevance across semantic and keyword queries
- Simplified codebase (-285 net lines)

Tests: All 125 tests passing (118 unit, 7 integration)

Implements ADR-014: Replace Custom Keyword Search with BM25 Hybrid Search

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 06:59:44 +01:00

144 lines
5.8 KiB
Python

"""Qdrant client wrapper."""
import logging
from qdrant_client import AsyncQdrantClient, models
from qdrant_client.models import Distance, VectorParams
from nextcloud_mcp_server.config import get_settings
logger = logging.getLogger(__name__)
# Singleton instance
_qdrant_client: AsyncQdrantClient | None = None
async def get_qdrant_client() -> AsyncQdrantClient:
"""
Get singleton Qdrant client instance.
Automatically creates collection on first use if it doesn't exist.
Supports three Qdrant modes:
- Network mode: QDRANT_URL set (e.g., http://qdrant:6333)
- In-memory mode: QDRANT_LOCATION=:memory: (default if nothing configured)
- Persistent local mode: QDRANT_LOCATION=/path/to/data
Returns:
Configured AsyncQdrantClient instance
Raises:
Exception: If Qdrant connection fails or collection creation fails
"""
global _qdrant_client
if _qdrant_client is None:
settings = get_settings()
# Detect mode and initialize client accordingly
if settings.qdrant_url:
# Network mode
logger.info(f"Using Qdrant network mode: {settings.qdrant_url}")
_qdrant_client = AsyncQdrantClient(
url=settings.qdrant_url,
api_key=settings.qdrant_api_key,
timeout=30,
)
elif settings.qdrant_location:
# Local mode (either :memory: or persistent path)
if settings.qdrant_location == ":memory:":
logger.info("Using Qdrant in-memory mode: :memory:")
_qdrant_client = AsyncQdrantClient(":memory:")
else:
# Persistent local mode - use path parameter
logger.info(f"Using Qdrant persistent mode: {settings.qdrant_location}")
_qdrant_client = AsyncQdrantClient(path=settings.qdrant_location)
else:
# Should not happen due to __post_init__ validation, but handle gracefully
logger.warning("No Qdrant mode configured, defaulting to :memory:")
_qdrant_client = AsyncQdrantClient(":memory:")
# Get collection name (auto-generated from deployment ID + model)
collection_name = settings.get_collection_name()
# Import here to avoid circular dependency
from nextcloud_mcp_server.embedding import get_embedding_service
embedding_service = get_embedding_service()
# Detect dimension dynamically (for OllamaEmbeddingProvider)
if hasattr(embedding_service.provider, "_detect_dimension"):
await embedding_service.provider._detect_dimension() # type: ignore[call-non-callable]
expected_dimension = embedding_service.get_dimension()
# Explicitly check if collection exists
logger.debug(f"Checking if collection '{collection_name}' exists...")
collections = await _qdrant_client.get_collections()
collection_names = [c.name for c in collections.collections]
if collection_name in collection_names:
# Collection exists - validate dimensions
logger.debug(
f"Collection '{collection_name}' found, validating dimensions..."
)
collection_info = await _qdrant_client.get_collection(collection_name)
# Handle both named vectors (dict) and legacy single vector
vectors = collection_info.config.params.vectors
if isinstance(vectors, dict):
actual_dimension = vectors["dense"].size
else:
actual_dimension = vectors.size
# Validate dimension matches
if actual_dimension != expected_dimension:
raise ValueError(
f"Dimension mismatch for collection '{collection_name}':\n"
f" Expected: {expected_dimension} (from embedding model '{settings.ollama_embedding_model}')\n"
f" Found: {actual_dimension}\n"
f"This usually means you changed the embedding model.\n"
f"Solutions:\n"
f" 1. Delete the old collection: Collection will be recreated with new dimensions\n"
f" 2. Set QDRANT_COLLECTION to use a different collection name\n"
f" 3. Revert OLLAMA_EMBEDDING_MODEL to the original model"
)
logger.info(
f"Using existing Qdrant collection: {collection_name} "
f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
)
else:
# Collection doesn't exist - create it
logger.info(
f"Collection '{collection_name}' not found, creating with "
f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
)
await _qdrant_client.create_collection(
collection_name=collection_name,
vectors_config={
"dense": VectorParams(
size=expected_dimension,
distance=Distance.COSINE,
),
},
sparse_vectors_config={
"sparse": models.SparseVectorParams(
index=models.SparseIndexParams(
on_disk=False,
)
),
},
)
logger.info(
f"Created Qdrant collection: {collection_name}\n"
f" Dense vector dimension: {expected_dimension}\n"
f" Dense embedding model: {settings.ollama_embedding_model}\n"
f" Sparse vectors: BM25 (for hybrid search)\n"
f" Distance: COSINE\n"
f"Background sync will index all documents with dense + sparse vectors."
)
return _qdrant_client