feat: implement vector sync scanner and processor (ADR-007 Phase 2)
Implements background vector database synchronization using anyio TaskGroups for BasicAuth mode with single-user credentials. Scanner Implementation: - Periodic document discovery (hourly, configurable) - Timestamp-based change detection (Nextcloud vs Qdrant) - Wake event for immediate scanning on-demand - Supports both initial sync (all docs) and incremental sync (changes only) - Detects deleted documents and queues for removal Processor Implementation: - Concurrent document processing pool (3 workers default) - I/O-bound embedding generation via Ollama API - Retry logic with exponential backoff (3 retries) - Document chunking (512 words, 50-word overlap) - Handles both index and delete operations - Upserts vectors to Qdrant with rich metadata App Lifespan Integration: - Extended AppContext with background task state - Modified app_lifespan_basic() to start tasks via anyio TaskGroups - Graceful shutdown with coordinated task cancellation - Only activates when VECTOR_SYNC_ENABLED=true Embedding Service: - OllamaEmbeddingProvider with TLS support - Singleton pattern for shared client instances - Batch embedding support for efficiency - Auto-detects embedding dimension (768 for nomic-embed-text) Qdrant Client: - Async client wrapper with singleton pattern - Auto-creates collection on first use - COSINE distance metric for semantic similarity - Integrates with embedding service for dimension detection Health Check Enhancement: - Added Qdrant status check to /health/ready endpoint - Only checks when VECTOR_SYNC_ENABLED=true - 2-second timeout for health probe - Reports connection errors with details Configuration: - VECTOR_SYNC_ENABLED: Enable background sync - VECTOR_SYNC_SCAN_INTERVAL: Scanner frequency (3600s default) - VECTOR_SYNC_PROCESSOR_WORKERS: Concurrent processors (3 default) - QDRANT_URL, QDRANT_API_KEY, QDRANT_COLLECTION: Vector DB config - OLLAMA_BASE_URL, OLLAMA_EMBEDDING_MODEL: Embedding service config Dependencies Added: - qdrant-client>=1.7.0: Vector database client Docker Compose: - Added Qdrant service with health check - Exposed ports 6333 (REST) and 6334 (gRPC) - Configured MCP service with vector sync environment - Added qdrant-data volume for persistence Known Issue: - FastMCP lifespan not triggering for streamable-http transport - Background tasks will start once lifespan integration is complete - Lifespan triggers on MCP session establishment, not server startup Related: ADR-007 Background Vector Database Synchronization 🤖 Generated with Claude Code (https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
"""Embedding service package for generating vector embeddings."""
|
||||
|
||||
from .service import EmbeddingService, get_embedding_service
|
||||
|
||||
__all__ = ["EmbeddingService", "get_embedding_service"]
|
||||
@@ -0,0 +1,43 @@
|
||||
"""Abstract base class for embedding providers."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class EmbeddingProvider(ABC):
|
||||
"""Base class for embedding providers."""
|
||||
|
||||
@abstractmethod
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts (optimized).
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension for this provider.
|
||||
|
||||
Returns:
|
||||
Vector dimension (e.g., 768 for nomic-embed-text)
|
||||
"""
|
||||
pass
|
||||
@@ -0,0 +1,85 @@
|
||||
"""Ollama embedding provider."""
|
||||
|
||||
import logging
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import EmbeddingProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OllamaEmbeddingProvider(EmbeddingProvider):
|
||||
"""Ollama embedding provider with TLS support."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
model: str = "nomic-embed-text",
|
||||
verify_ssl: bool = True,
|
||||
):
|
||||
"""
|
||||
Initialize Ollama embedding provider.
|
||||
|
||||
Args:
|
||||
base_url: Ollama API base URL (e.g., https://ollama.internal.coutinho.io:443)
|
||||
model: Embedding model name (default: nomic-embed-text)
|
||||
verify_ssl: Verify SSL certificates (default: True)
|
||||
"""
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
self.verify_ssl = verify_ssl
|
||||
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=30.0)
|
||||
self._dimension = 768 # nomic-embed-text default
|
||||
logger.info(
|
||||
f"Initialized Ollama provider: {base_url} (model={model}, verify_ssl={verify_ssl})"
|
||||
)
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
"""
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embeddings",
|
||||
json={"model": self.model, "prompt": text},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts (batched requests).
|
||||
|
||||
Note: Ollama doesn't have native batch API, so we send requests sequentially.
|
||||
For better performance with large batches, consider using asyncio.gather().
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
"""
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = await self.embed(text)
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension (768 for nomic-embed-text)
|
||||
"""
|
||||
return self._dimension
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client."""
|
||||
await self.client.aclose()
|
||||
@@ -0,0 +1,102 @@
|
||||
"""Embedding service with provider detection."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from .base import EmbeddingProvider
|
||||
from .ollama_provider import OllamaEmbeddingProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""Unified embedding service with automatic provider detection."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize embedding service with auto-detected provider."""
|
||||
self.provider = self._detect_provider()
|
||||
|
||||
def _detect_provider(self) -> EmbeddingProvider:
|
||||
"""
|
||||
Auto-detect available embedding provider.
|
||||
|
||||
Checks environment variables in order:
|
||||
1. OLLAMA_BASE_URL - Use Ollama provider
|
||||
|
||||
Returns:
|
||||
Configured embedding provider
|
||||
|
||||
Raises:
|
||||
ValueError: If no embedding provider is configured
|
||||
"""
|
||||
# Ollama provider (for this deployment)
|
||||
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if ollama_url:
|
||||
return OllamaEmbeddingProvider(
|
||||
base_url=ollama_url,
|
||||
model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
|
||||
verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
|
||||
)
|
||||
|
||||
raise ValueError(
|
||||
"No embedding provider configured. "
|
||||
"Set OLLAMA_BASE_URL environment variable."
|
||||
)
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
"""
|
||||
return await self.provider.embed(text)
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
"""
|
||||
return await self.provider.embed_batch(texts)
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension
|
||||
"""
|
||||
return self.provider.get_dimension()
|
||||
|
||||
async def close(self):
|
||||
"""Close provider resources."""
|
||||
if hasattr(self.provider, "close") and callable(
|
||||
getattr(self.provider, "close")
|
||||
):
|
||||
close_method = getattr(self.provider, "close")
|
||||
await close_method()
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_embedding_service: EmbeddingService | None = None
|
||||
|
||||
|
||||
def get_embedding_service() -> EmbeddingService:
|
||||
"""
|
||||
Get singleton embedding service instance.
|
||||
|
||||
Returns:
|
||||
Global EmbeddingService instance
|
||||
"""
|
||||
global _embedding_service
|
||||
if _embedding_service is None:
|
||||
_embedding_service = EmbeddingService()
|
||||
return _embedding_service
|
||||
Reference in New Issue
Block a user