fix: add dynamic dimension detection for Ollama embedding models

This fixes dimension mismatch errors when using embedding models with
non-standard dimensions (e.g., qwen3-embedding:4b produces 2560-dim
vectors instead of the hardcoded 768).

Changes:
- OllamaEmbeddingProvider: Detect dimensions dynamically by generating
  test embedding instead of hardcoding to 768
- qdrant_client: Call dimension detection before collection creation
- app.py: Initialize Qdrant collection before starting background tasks
  in streamable-http transport path
- tests: Fix integration tests to properly mock EmbeddingService wrapper

Fixes dimension mismatch error:
"could not broadcast input array from shape (2560,) into shape (768,)"

All integration tests passing (6/6).

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Chris Coutinho
2025-11-12 02:46:30 +01:00
parent f6656fee06
commit 6812e1aca7
4 changed files with 396 additions and 11 deletions
+26
View File
@@ -418,6 +418,19 @@ async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
"NEXTCLOUD_USERNAME is required for vector sync in BasicAuth mode"
)
# Initialize Qdrant collection before starting background tasks
logger.info("Initializing Qdrant collection...")
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
try:
await get_qdrant_client() # Triggers collection creation if needed
logger.info("Qdrant collection ready")
except Exception as e:
logger.error(f"Failed to initialize Qdrant collection: {e}")
raise RuntimeError(
f"Cannot start vector sync - Qdrant initialization failed: {e}"
) from e
# Initialize shared state
send_stream, receive_stream = anyio.create_memory_object_stream(
max_buffer_size=settings.vector_sync_queue_max_size
@@ -1086,6 +1099,19 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
# Create client since we're outside FastMCP lifespan
client = NextcloudClient.from_env()
# Initialize Qdrant collection before starting background tasks
logger.info("Initializing Qdrant collection...")
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
try:
await get_qdrant_client() # Triggers collection creation if needed
logger.info("Qdrant collection ready")
except Exception as e:
logger.error(f"Failed to initialize Qdrant collection: {e}")
raise RuntimeError(
f"Cannot start vector sync - Qdrant initialization failed: {e}"
) from e
# Initialize shared state
send_stream, receive_stream = anyio_module.create_memory_object_stream(
max_buffer_size=settings.vector_sync_queue_max_size
@@ -17,6 +17,7 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
base_url: str,
model: str = "nomic-embed-text",
verify_ssl: bool = True,
timeout=httpx.Timeout(timeout=120, connect=5),
):
"""
Initialize Ollama embedding provider.
@@ -29,8 +30,8 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
self.base_url = base_url.rstrip("/")
self.model = model
self.verify_ssl = verify_ssl
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=30.0)
self._dimension = 768 # nomic-embed-text default
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
self._dimension: int | None = None # Will be detected dynamically
logger.info(
f"Initialized Ollama provider: {base_url} (model={model}, verify_ssl={verify_ssl})"
)
@@ -73,13 +74,36 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
embeddings.append(embedding)
return embeddings
async def _detect_dimension(self):
"""
Detect embedding dimension by generating a test embedding.
This method queries the model to determine the actual dimension
instead of relying on hardcoded values.
"""
if self._dimension is None:
logger.debug(f"Detecting embedding dimension for model {self.model}...")
test_embedding = await self.embed("test")
self._dimension = len(test_embedding)
logger.info(
f"Detected embedding dimension: {self._dimension} for model {self.model}"
)
def get_dimension(self) -> int:
"""
Get embedding dimension.
Returns:
Vector dimension (768 for nomic-embed-text)
Vector dimension for the configured model
Raises:
RuntimeError: If dimension not detected yet (call _detect_dimension first)
"""
if self._dimension is None:
raise RuntimeError(
f"Embedding dimension not detected yet for model {self.model}. "
"Call _detect_dimension() first or generate an embedding."
)
return self._dimension
def _check_model_is_loaded(self, autoload: bool = True):
+21 -8
View File
@@ -66,10 +66,23 @@ async def get_qdrant_client() -> AsyncQdrantClient:
from nextcloud_mcp_server.embedding import get_embedding_service
embedding_service = get_embedding_service()
# Detect dimension dynamically (for OllamaEmbeddingProvider)
if hasattr(embedding_service.provider, "_detect_dimension"):
await embedding_service.provider._detect_dimension() # type: ignore[call-non-callable]
expected_dimension = embedding_service.get_dimension()
try:
# Get existing collection
# Explicitly check if collection exists
logger.debug(f"Checking if collection '{collection_name}' exists...")
collections = await _qdrant_client.get_collections()
collection_names = [c.name for c in collections.collections]
if collection_name in collection_names:
# Collection exists - validate dimensions
logger.debug(
f"Collection '{collection_name}' found, validating dimensions..."
)
collection_info = await _qdrant_client.get_collection(collection_name)
actual_dimension = collection_info.config.params.vectors.size
@@ -91,12 +104,12 @@ async def get_qdrant_client() -> AsyncQdrantClient:
f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
)
except Exception as e:
# Check if it's a dimension mismatch error (re-raise it)
if isinstance(e, ValueError) and "Dimension mismatch" in str(e):
raise
# Collection doesn't exist or other error, create it
else:
# Collection doesn't exist - create it
logger.info(
f"Collection '{collection_name}' not found, creating with "
f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
)
await _qdrant_client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(
@@ -0,0 +1,322 @@
"""Integration tests for Qdrant collection auto-creation.
These tests validate that:
1. Collections are automatically created on first access
2. Dimension validation detects mismatches
3. Idempotent initialization (multiple calls don't fail)
4. Proper error handling and logging
"""
from unittest.mock import Mock
import pytest
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
pytestmark = pytest.mark.integration
@pytest.fixture(autouse=True)
async def reset_singleton():
"""Reset the global Qdrant client singleton between tests."""
global _qdrant_client
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
# Store original
original = qdrant_module._qdrant_client
# Reset for test
qdrant_module._qdrant_client = None
yield
# Restore original
qdrant_module._qdrant_client = original
@pytest.mark.integration
async def test_collection_auto_created_on_first_access(monkeypatch):
"""Test that collection is automatically created if it doesn't exist."""
# Mock settings
from nextcloud_mcp_server.config import Settings
mock_settings = Settings(
qdrant_location=":memory:",
ollama_embedding_model="nomic-embed-text",
vector_sync_enabled=False, # Disable background sync for test
)
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# Mock embedding service - must have .provider attribute
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service = Mock()
mock_embedding_service.provider = mock_provider
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service,
)
# Get client (should trigger collection creation)
client = await get_qdrant_client()
# Verify client is initialized
assert client is not None
# Verify collection was created
collection_name = mock_settings.get_collection_name()
collections = await client.get_collections()
collection_names = [c.name for c in collections.collections]
assert collection_name in collection_names
# Verify collection has correct dimensions
collection_info = await client.get_collection(collection_name)
assert collection_info.config.params.vectors.size == 384
@pytest.mark.integration
async def test_existing_collection_reused(monkeypatch):
"""Test that existing collection is reused without error."""
# Mock settings
from nextcloud_mcp_server.config import Settings
mock_settings = Settings(
qdrant_location=":memory:",
ollama_embedding_model="nomic-embed-text",
vector_sync_enabled=False,
)
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# Mock embedding service - must have .provider attribute
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service = Mock()
mock_embedding_service.provider = mock_provider
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service,
)
# First call - creates collection
_ = await get_qdrant_client()
collection_name = mock_settings.get_collection_name()
# Reset singleton to simulate second initialization
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
qdrant_module._qdrant_client = None
# Second call - should reuse existing collection
client2 = await get_qdrant_client()
# Verify both clients work
assert client2 is not None
# Verify collection still exists and wasn't recreated
collections = await client2.get_collections()
collection_names = [c.name for c in collections.collections]
assert collection_name in collection_names
# Verify dimensions unchanged
collection_info = await client2.get_collection(collection_name)
assert collection_info.config.params.vectors.size == 384
@pytest.mark.integration
async def test_dimension_mismatch_detected(monkeypatch, tmp_path):
"""Test that dimension mismatch raises clear error."""
# Use persistent temp directory so collection survives client reset
from nextcloud_mcp_server.config import Settings
qdrant_path = str(tmp_path / "qdrant_data")
mock_settings = Settings(
qdrant_location=qdrant_path,
ollama_embedding_model="nomic-embed-text",
vector_sync_enabled=False,
)
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# First embedding service: 384 dimensions
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider_1 = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service_1 = Mock()
mock_embedding_service_1.provider = mock_provider_1
mock_embedding_service_1.get_dimension = lambda: mock_provider_1.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service_1,
)
# First call - creates collection with 384 dimensions
client1 = await get_qdrant_client()
collection_name = mock_settings.get_collection_name()
# Verify collection created
collection_info = await client1.get_collection(collection_name)
assert collection_info.config.params.vectors.size == 384
# Close client1 to release file lock
await client1.close()
# Reset singleton (but collection persists in temp directory)
import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
qdrant_module._qdrant_client = None
# Change embedding service to different dimension (768)
mock_provider_2 = SimpleEmbeddingProvider(dimension=768)
mock_embedding_service_2 = Mock()
mock_embedding_service_2.provider = mock_provider_2
mock_embedding_service_2.get_dimension = lambda: mock_provider_2.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service_2,
)
# Second call - should detect dimension mismatch and raise error
with pytest.raises(ValueError) as exc_info:
await get_qdrant_client()
# Verify error message is helpful
error_msg = str(exc_info.value)
assert "Dimension mismatch" in error_msg
assert "384" in error_msg # Old dimension
assert "768" in error_msg # New dimension
assert "Solutions:" in error_msg # Includes helpful solutions
@pytest.mark.integration
async def test_idempotent_initialization(monkeypatch):
"""Test that multiple calls to get_qdrant_client() are idempotent."""
# Mock settings
from nextcloud_mcp_server.config import Settings
mock_settings = Settings(
qdrant_location=":memory:",
ollama_embedding_model="nomic-embed-text",
vector_sync_enabled=False,
)
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# Mock embedding service - must have .provider attribute
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service = Mock()
mock_embedding_service.provider = mock_provider
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service,
)
# Call multiple times
client1 = await get_qdrant_client()
client2 = await get_qdrant_client()
client3 = await get_qdrant_client()
# All should return same singleton instance
assert client1 is client2
assert client2 is client3
# Collection should exist
collection_name = mock_settings.get_collection_name()
collections = await client1.get_collections()
collection_names = [c.name for c in collections.collections]
assert collection_name in collection_names
@pytest.mark.integration
async def test_collection_name_generation(monkeypatch):
"""Test that collection name is correctly generated from deployment ID and model."""
# Mock settings with custom deployment ID
from nextcloud_mcp_server.config import Settings
mock_settings = Settings(
qdrant_location=":memory:",
ollama_embedding_model="test-model",
vector_sync_enabled=False,
)
# Mock deployment ID
monkeypatch.setenv("MCP_DEPLOYMENT_ID", "test-deployment")
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# Mock embedding service - must have .provider attribute
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service = Mock()
mock_embedding_service.provider = mock_provider
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service,
)
# Get client
client = await get_qdrant_client()
# Verify collection name includes deployment ID and model
collection_name = mock_settings.get_collection_name()
assert "test-deployment" in collection_name or "test-model" in collection_name
# Verify collection was created with that name
collections = await client.get_collections()
collection_names = [c.name for c in collections.collections]
assert collection_name in collection_names
@pytest.mark.integration
async def test_collection_uses_cosine_distance(monkeypatch):
"""Test that created collection uses COSINE distance metric."""
# Mock settings
from nextcloud_mcp_server.config import Settings
mock_settings = Settings(
qdrant_location=":memory:",
ollama_embedding_model="nomic-embed-text",
vector_sync_enabled=False,
)
monkeypatch.setattr(
"nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
)
# Mock embedding service - must have .provider attribute
from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
mock_provider = SimpleEmbeddingProvider(dimension=384)
mock_embedding_service = Mock()
mock_embedding_service.provider = mock_provider
mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
monkeypatch.setattr(
"nextcloud_mcp_server.embedding.get_embedding_service",
lambda: mock_embedding_service,
)
# Get client (creates collection)
client = await get_qdrant_client()
# Verify collection uses COSINE distance
collection_name = mock_settings.get_collection_name()
collection_info = await client.get_collection(collection_name)
from qdrant_client.models import Distance
assert collection_info.config.params.vectors.distance == Distance.COSINE