fix: add dynamic dimension detection for Ollama embedding models
This fixes dimension mismatch errors when using embedding models with non-standard dimensions (e.g., qwen3-embedding:4b produces 2560-dim vectors instead of the hardcoded 768). Changes: - OllamaEmbeddingProvider: Detect dimensions dynamically by generating test embedding instead of hardcoding to 768 - qdrant_client: Call dimension detection before collection creation - app.py: Initialize Qdrant collection before starting background tasks in streamable-http transport path - tests: Fix integration tests to properly mock EmbeddingService wrapper Fixes dimension mismatch error: "could not broadcast input array from shape (2560,) into shape (768,)" All integration tests passing (6/6). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -418,6 +418,19 @@ async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
|
||||
"NEXTCLOUD_USERNAME is required for vector sync in BasicAuth mode"
|
||||
)
|
||||
|
||||
# Initialize Qdrant collection before starting background tasks
|
||||
logger.info("Initializing Qdrant collection...")
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
try:
|
||||
await get_qdrant_client() # Triggers collection creation if needed
|
||||
logger.info("Qdrant collection ready")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Qdrant collection: {e}")
|
||||
raise RuntimeError(
|
||||
f"Cannot start vector sync - Qdrant initialization failed: {e}"
|
||||
) from e
|
||||
|
||||
# Initialize shared state
|
||||
send_stream, receive_stream = anyio.create_memory_object_stream(
|
||||
max_buffer_size=settings.vector_sync_queue_max_size
|
||||
@@ -1086,6 +1099,19 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
# Create client since we're outside FastMCP lifespan
|
||||
client = NextcloudClient.from_env()
|
||||
|
||||
# Initialize Qdrant collection before starting background tasks
|
||||
logger.info("Initializing Qdrant collection...")
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
try:
|
||||
await get_qdrant_client() # Triggers collection creation if needed
|
||||
logger.info("Qdrant collection ready")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize Qdrant collection: {e}")
|
||||
raise RuntimeError(
|
||||
f"Cannot start vector sync - Qdrant initialization failed: {e}"
|
||||
) from e
|
||||
|
||||
# Initialize shared state
|
||||
send_stream, receive_stream = anyio_module.create_memory_object_stream(
|
||||
max_buffer_size=settings.vector_sync_queue_max_size
|
||||
|
||||
@@ -17,6 +17,7 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
|
||||
base_url: str,
|
||||
model: str = "nomic-embed-text",
|
||||
verify_ssl: bool = True,
|
||||
timeout=httpx.Timeout(timeout=120, connect=5),
|
||||
):
|
||||
"""
|
||||
Initialize Ollama embedding provider.
|
||||
@@ -29,8 +30,8 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
self.verify_ssl = verify_ssl
|
||||
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=30.0)
|
||||
self._dimension = 768 # nomic-embed-text default
|
||||
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
|
||||
self._dimension: int | None = None # Will be detected dynamically
|
||||
logger.info(
|
||||
f"Initialized Ollama provider: {base_url} (model={model}, verify_ssl={verify_ssl})"
|
||||
)
|
||||
@@ -73,13 +74,36 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
|
||||
async def _detect_dimension(self):
|
||||
"""
|
||||
Detect embedding dimension by generating a test embedding.
|
||||
|
||||
This method queries the model to determine the actual dimension
|
||||
instead of relying on hardcoded values.
|
||||
"""
|
||||
if self._dimension is None:
|
||||
logger.debug(f"Detecting embedding dimension for model {self.model}...")
|
||||
test_embedding = await self.embed("test")
|
||||
self._dimension = len(test_embedding)
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} for model {self.model}"
|
||||
)
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension (768 for nomic-embed-text)
|
||||
Vector dimension for the configured model
|
||||
|
||||
Raises:
|
||||
RuntimeError: If dimension not detected yet (call _detect_dimension first)
|
||||
"""
|
||||
if self._dimension is None:
|
||||
raise RuntimeError(
|
||||
f"Embedding dimension not detected yet for model {self.model}. "
|
||||
"Call _detect_dimension() first or generate an embedding."
|
||||
)
|
||||
return self._dimension
|
||||
|
||||
def _check_model_is_loaded(self, autoload: bool = True):
|
||||
|
||||
@@ -66,10 +66,23 @@ async def get_qdrant_client() -> AsyncQdrantClient:
|
||||
from nextcloud_mcp_server.embedding import get_embedding_service
|
||||
|
||||
embedding_service = get_embedding_service()
|
||||
|
||||
# Detect dimension dynamically (for OllamaEmbeddingProvider)
|
||||
if hasattr(embedding_service.provider, "_detect_dimension"):
|
||||
await embedding_service.provider._detect_dimension() # type: ignore[call-non-callable]
|
||||
|
||||
expected_dimension = embedding_service.get_dimension()
|
||||
|
||||
try:
|
||||
# Get existing collection
|
||||
# Explicitly check if collection exists
|
||||
logger.debug(f"Checking if collection '{collection_name}' exists...")
|
||||
collections = await _qdrant_client.get_collections()
|
||||
collection_names = [c.name for c in collections.collections]
|
||||
|
||||
if collection_name in collection_names:
|
||||
# Collection exists - validate dimensions
|
||||
logger.debug(
|
||||
f"Collection '{collection_name}' found, validating dimensions..."
|
||||
)
|
||||
collection_info = await _qdrant_client.get_collection(collection_name)
|
||||
actual_dimension = collection_info.config.params.vectors.size
|
||||
|
||||
@@ -91,12 +104,12 @@ async def get_qdrant_client() -> AsyncQdrantClient:
|
||||
f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
# Check if it's a dimension mismatch error (re-raise it)
|
||||
if isinstance(e, ValueError) and "Dimension mismatch" in str(e):
|
||||
raise
|
||||
|
||||
# Collection doesn't exist or other error, create it
|
||||
else:
|
||||
# Collection doesn't exist - create it
|
||||
logger.info(
|
||||
f"Collection '{collection_name}' not found, creating with "
|
||||
f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
|
||||
)
|
||||
await _qdrant_client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=VectorParams(
|
||||
|
||||
@@ -0,0 +1,322 @@
|
||||
"""Integration tests for Qdrant collection auto-creation.
|
||||
|
||||
These tests validate that:
|
||||
1. Collections are automatically created on first access
|
||||
2. Dimension validation detects mismatches
|
||||
3. Idempotent initialization (multiple calls don't fail)
|
||||
4. Proper error handling and logging
|
||||
"""
|
||||
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
async def reset_singleton():
|
||||
"""Reset the global Qdrant client singleton between tests."""
|
||||
global _qdrant_client
|
||||
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
|
||||
|
||||
# Store original
|
||||
original = qdrant_module._qdrant_client
|
||||
|
||||
# Reset for test
|
||||
qdrant_module._qdrant_client = None
|
||||
|
||||
yield
|
||||
|
||||
# Restore original
|
||||
qdrant_module._qdrant_client = original
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_collection_auto_created_on_first_access(monkeypatch):
|
||||
"""Test that collection is automatically created if it doesn't exist."""
|
||||
# Mock settings
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
mock_settings = Settings(
|
||||
qdrant_location=":memory:",
|
||||
ollama_embedding_model="nomic-embed-text",
|
||||
vector_sync_enabled=False, # Disable background sync for test
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# Mock embedding service - must have .provider attribute
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service = Mock()
|
||||
mock_embedding_service.provider = mock_provider
|
||||
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service,
|
||||
)
|
||||
|
||||
# Get client (should trigger collection creation)
|
||||
client = await get_qdrant_client()
|
||||
|
||||
# Verify client is initialized
|
||||
assert client is not None
|
||||
|
||||
# Verify collection was created
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
collections = await client.get_collections()
|
||||
collection_names = [c.name for c in collections.collections]
|
||||
assert collection_name in collection_names
|
||||
|
||||
# Verify collection has correct dimensions
|
||||
collection_info = await client.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors.size == 384
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_existing_collection_reused(monkeypatch):
|
||||
"""Test that existing collection is reused without error."""
|
||||
# Mock settings
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
mock_settings = Settings(
|
||||
qdrant_location=":memory:",
|
||||
ollama_embedding_model="nomic-embed-text",
|
||||
vector_sync_enabled=False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# Mock embedding service - must have .provider attribute
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service = Mock()
|
||||
mock_embedding_service.provider = mock_provider
|
||||
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service,
|
||||
)
|
||||
|
||||
# First call - creates collection
|
||||
_ = await get_qdrant_client()
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
|
||||
# Reset singleton to simulate second initialization
|
||||
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
|
||||
|
||||
qdrant_module._qdrant_client = None
|
||||
|
||||
# Second call - should reuse existing collection
|
||||
client2 = await get_qdrant_client()
|
||||
|
||||
# Verify both clients work
|
||||
assert client2 is not None
|
||||
|
||||
# Verify collection still exists and wasn't recreated
|
||||
collections = await client2.get_collections()
|
||||
collection_names = [c.name for c in collections.collections]
|
||||
assert collection_name in collection_names
|
||||
|
||||
# Verify dimensions unchanged
|
||||
collection_info = await client2.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors.size == 384
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_dimension_mismatch_detected(monkeypatch, tmp_path):
|
||||
"""Test that dimension mismatch raises clear error."""
|
||||
# Use persistent temp directory so collection survives client reset
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
qdrant_path = str(tmp_path / "qdrant_data")
|
||||
mock_settings = Settings(
|
||||
qdrant_location=qdrant_path,
|
||||
ollama_embedding_model="nomic-embed-text",
|
||||
vector_sync_enabled=False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# First embedding service: 384 dimensions
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider_1 = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service_1 = Mock()
|
||||
mock_embedding_service_1.provider = mock_provider_1
|
||||
mock_embedding_service_1.get_dimension = lambda: mock_provider_1.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service_1,
|
||||
)
|
||||
|
||||
# First call - creates collection with 384 dimensions
|
||||
client1 = await get_qdrant_client()
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
|
||||
# Verify collection created
|
||||
collection_info = await client1.get_collection(collection_name)
|
||||
assert collection_info.config.params.vectors.size == 384
|
||||
|
||||
# Close client1 to release file lock
|
||||
await client1.close()
|
||||
|
||||
# Reset singleton (but collection persists in temp directory)
|
||||
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
|
||||
|
||||
qdrant_module._qdrant_client = None
|
||||
|
||||
# Change embedding service to different dimension (768)
|
||||
mock_provider_2 = SimpleEmbeddingProvider(dimension=768)
|
||||
mock_embedding_service_2 = Mock()
|
||||
mock_embedding_service_2.provider = mock_provider_2
|
||||
mock_embedding_service_2.get_dimension = lambda: mock_provider_2.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service_2,
|
||||
)
|
||||
|
||||
# Second call - should detect dimension mismatch and raise error
|
||||
with pytest.raises(ValueError) as exc_info:
|
||||
await get_qdrant_client()
|
||||
|
||||
# Verify error message is helpful
|
||||
error_msg = str(exc_info.value)
|
||||
assert "Dimension mismatch" in error_msg
|
||||
assert "384" in error_msg # Old dimension
|
||||
assert "768" in error_msg # New dimension
|
||||
assert "Solutions:" in error_msg # Includes helpful solutions
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_idempotent_initialization(monkeypatch):
|
||||
"""Test that multiple calls to get_qdrant_client() are idempotent."""
|
||||
# Mock settings
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
mock_settings = Settings(
|
||||
qdrant_location=":memory:",
|
||||
ollama_embedding_model="nomic-embed-text",
|
||||
vector_sync_enabled=False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# Mock embedding service - must have .provider attribute
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service = Mock()
|
||||
mock_embedding_service.provider = mock_provider
|
||||
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service,
|
||||
)
|
||||
|
||||
# Call multiple times
|
||||
client1 = await get_qdrant_client()
|
||||
client2 = await get_qdrant_client()
|
||||
client3 = await get_qdrant_client()
|
||||
|
||||
# All should return same singleton instance
|
||||
assert client1 is client2
|
||||
assert client2 is client3
|
||||
|
||||
# Collection should exist
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
collections = await client1.get_collections()
|
||||
collection_names = [c.name for c in collections.collections]
|
||||
assert collection_name in collection_names
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_collection_name_generation(monkeypatch):
|
||||
"""Test that collection name is correctly generated from deployment ID and model."""
|
||||
# Mock settings with custom deployment ID
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
mock_settings = Settings(
|
||||
qdrant_location=":memory:",
|
||||
ollama_embedding_model="test-model",
|
||||
vector_sync_enabled=False,
|
||||
)
|
||||
|
||||
# Mock deployment ID
|
||||
monkeypatch.setenv("MCP_DEPLOYMENT_ID", "test-deployment")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# Mock embedding service - must have .provider attribute
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service = Mock()
|
||||
mock_embedding_service.provider = mock_provider
|
||||
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service,
|
||||
)
|
||||
|
||||
# Get client
|
||||
client = await get_qdrant_client()
|
||||
|
||||
# Verify collection name includes deployment ID and model
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
assert "test-deployment" in collection_name or "test-model" in collection_name
|
||||
|
||||
# Verify collection was created with that name
|
||||
collections = await client.get_collections()
|
||||
collection_names = [c.name for c in collections.collections]
|
||||
assert collection_name in collection_names
|
||||
|
||||
|
||||
@pytest.mark.integration
|
||||
async def test_collection_uses_cosine_distance(monkeypatch):
|
||||
"""Test that created collection uses COSINE distance metric."""
|
||||
# Mock settings
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
mock_settings = Settings(
|
||||
qdrant_location=":memory:",
|
||||
ollama_embedding_model="nomic-embed-text",
|
||||
vector_sync_enabled=False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
|
||||
)
|
||||
|
||||
# Mock embedding service - must have .provider attribute
|
||||
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
|
||||
|
||||
mock_provider = SimpleEmbeddingProvider(dimension=384)
|
||||
mock_embedding_service = Mock()
|
||||
mock_embedding_service.provider = mock_provider
|
||||
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
|
||||
monkeypatch.setattr(
|
||||
"nextcloud_mcp_server.embedding.get_embedding_service",
|
||||
lambda: mock_embedding_service,
|
||||
)
|
||||
|
||||
# Get client (creates collection)
|
||||
client = await get_qdrant_client()
|
||||
|
||||
# Verify collection uses COSINE distance
|
||||
collection_name = mock_settings.get_collection_name()
|
||||
collection_info = await client.get_collection(collection_name)
|
||||
|
||||
from qdrant_client.models import Distance
|
||||
|
||||
assert collection_info.config.params.vectors.distance == Distance.COSINE
|
||||
Reference in New Issue
Block a user