feat: add unified provider architecture with Amazon Bedrock support
Refactored LLM provider infrastructure to support sustainable additions of new providers with both embedding and text generation capabilities.
## Major Changes
### Unified Provider Architecture (ADR-015)
- Created `nextcloud_mcp_server/providers/` with unified Provider ABC
- Providers now support optional capabilities (embeddings and/or generation)
- Auto-detection registry with priority: Bedrock → Ollama → Simple
- Backward compatible - existing code continues to work
### New Providers
- **BedrockProvider**: Full Amazon Bedrock integration
- Embeddings: Titan Embed, Cohere Embed models
- Generation: Claude, Llama, Titan Text, Mistral models
- Model-specific request/response handling
- AWS credential chain integration
- **OllamaProvider**: Migrated with both capabilities support
- **AnthropicProvider**: Moved from test code to production providers
- **SimpleProvider**: Migrated in-memory fallback provider
### Breaking Changes
None - full backward compatibility maintained:
- `embedding.get_embedding_service()` still works
- RAG evaluation tests updated to use unified providers
- All existing tests pass (127 unit tests)
### Testing
- Added 9 comprehensive Bedrock unit tests with mocked boto3
- All existing unit tests pass
- Type checking (ty) and linting (ruff) pass
- Verified backward compatibility
### Documentation
- `docs/ADR-015-unified-provider-architecture.md`: Comprehensive ADR
- `docs/bedrock-setup.md`: AWS setup guide with IAM permissions
- `CLAUDE.md`: Updated with provider architecture section
### Dependencies
- Added `boto3>=1.35.0` to dev dependencies (optional)
## Environment Variables
### Bedrock
- `AWS_REGION`: AWS region (e.g., "us-east-1")
- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings
- `BEDROCK_GENERATION_MODEL`: Model ID for generation
- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`: Optional credentials
### Ollama
- `OLLAMA_BASE_URL`: API URL
- `OLLAMA_EMBEDDING_MODEL`: Embedding model (default: "nomic-embed-text")
- `OLLAMA_GENERATION_MODEL`: Generation model
## AWS Bedrock Permissions Required
Minimal IAM policy:
```json
{
"Effect": "Allow",
"Action": ["bedrock:InvokeModel"],
"Resource": ["arn:aws:bedrock:*::foundation-model/*"]
}
```
See `docs/bedrock-setup.md` for detailed setup instructions.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,57 +1,30 @@
|
||||
"""Embedding service with provider detection."""
|
||||
"""Embedding service with provider detection.
|
||||
|
||||
DEPRECATED: This module is maintained for backward compatibility.
|
||||
New code should use nextcloud_mcp_server.providers.get_provider() directly.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from .base import EmbeddingProvider
|
||||
from nextcloud_mcp_server.providers import get_provider
|
||||
|
||||
from .bm25_provider import BM25SparseEmbeddingProvider
|
||||
from .ollama_provider import OllamaEmbeddingProvider
|
||||
from .simple_provider import SimpleEmbeddingProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""Unified embedding service with automatic provider detection."""
|
||||
"""
|
||||
Unified embedding service with automatic provider detection.
|
||||
|
||||
DEPRECATED: This class wraps the new unified provider infrastructure
|
||||
for backward compatibility. New code should use
|
||||
nextcloud_mcp_server.providers.get_provider() directly.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize embedding service with auto-detected provider."""
|
||||
self.provider = self._detect_provider()
|
||||
|
||||
def _detect_provider(self) -> EmbeddingProvider:
|
||||
"""
|
||||
Auto-detect available embedding provider.
|
||||
|
||||
Checks environment variables in order:
|
||||
1. OLLAMA_BASE_URL - Use Ollama provider (production)
|
||||
2. OPENAI_API_KEY - Use OpenAI provider (future)
|
||||
3. Fallback to SimpleEmbeddingProvider (testing/development)
|
||||
|
||||
Returns:
|
||||
Configured embedding provider
|
||||
"""
|
||||
# Ollama provider (production)
|
||||
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if ollama_url:
|
||||
logger.info(f"Using Ollama embedding provider: {ollama_url}")
|
||||
return OllamaEmbeddingProvider(
|
||||
base_url=ollama_url,
|
||||
model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
|
||||
verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
|
||||
)
|
||||
|
||||
# OpenAI provider (future implementation)
|
||||
# openai_key = os.getenv("OPENAI_API_KEY")
|
||||
# if openai_key:
|
||||
# return OpenAIEmbeddingProvider(api_key=openai_key)
|
||||
|
||||
# Fallback to simple provider for development/testing
|
||||
logger.warning(
|
||||
"No embedding provider configured (OLLAMA_BASE_URL or OPENAI_API_KEY not set). "
|
||||
"Using SimpleEmbeddingProvider for testing/development. "
|
||||
"For production, configure an external embedding service."
|
||||
)
|
||||
return SimpleEmbeddingProvider(dimension=384)
|
||||
self.provider = get_provider()
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
"""Unified provider infrastructure for embeddings and text generation."""
|
||||
|
||||
from .anthropic import AnthropicProvider
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .registry import get_provider, reset_provider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
__all__ = [
|
||||
"Provider",
|
||||
"OllamaProvider",
|
||||
"AnthropicProvider",
|
||||
"SimpleProvider",
|
||||
"BedrockProvider",
|
||||
"get_provider",
|
||||
"reset_provider",
|
||||
]
|
||||
@@ -0,0 +1,97 @@
|
||||
"""Unified Anthropic provider for text generation."""
|
||||
|
||||
import logging
|
||||
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
from .base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AnthropicProvider(Provider):
|
||||
"""
|
||||
Anthropic provider for text generation.
|
||||
|
||||
Supports Claude models via the Anthropic API.
|
||||
Note: Anthropic doesn't provide embedding models, only text generation.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
|
||||
"""
|
||||
Initialize Anthropic provider.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key
|
||||
model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
"""
|
||||
self.client = AsyncAnthropic(api_key=api_key)
|
||||
self.model = model
|
||||
|
||||
logger.info(f"Initialized Anthropic provider (model={model})")
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return False
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return True
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Anthropic doesn't provide embedding models
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
|
||||
)
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Anthropic doesn't provide embedding models
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
|
||||
)
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Anthropic doesn't provide embedding models
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
|
||||
)
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text using Anthropic API.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
"""
|
||||
message = await self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.7,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
return message.content[0].text
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the client (no-op for Anthropic SDK)."""
|
||||
pass
|
||||
@@ -0,0 +1,91 @@
|
||||
"""Unified provider interface for embeddings and text generation."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""
|
||||
Unified base class for LLM providers.
|
||||
|
||||
Providers can support embeddings, text generation, or both.
|
||||
Use capability properties to determine what features are available.
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If provider doesn't support embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts (optimized).
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If provider doesn't support embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension for this provider.
|
||||
|
||||
Returns:
|
||||
Vector dimension (e.g., 768 for nomic-embed-text)
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If provider doesn't support embeddings
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If provider doesn't support generation
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def close(self) -> None:
|
||||
"""Close the provider and release resources."""
|
||||
pass
|
||||
@@ -0,0 +1,397 @@
|
||||
"""Amazon Bedrock provider for embeddings and text generation."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import boto3
|
||||
from botocore.exceptions import BotoCoreError, ClientError
|
||||
|
||||
BOTO3_AVAILABLE = True
|
||||
except ImportError:
|
||||
BOTO3_AVAILABLE = False
|
||||
|
||||
from .base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BedrockProvider(Provider):
|
||||
"""
|
||||
Amazon Bedrock provider supporting both embeddings and text generation.
|
||||
|
||||
Uses AWS Bedrock Runtime API with boto3. Supports various model families:
|
||||
- Embeddings: amazon.titan-embed-text-v1, amazon.titan-embed-text-v2, cohere.embed-*
|
||||
- Text Generation: anthropic.claude-*, meta.llama3-*, amazon.titan-text-*, mistral.*, etc.
|
||||
|
||||
Requires AWS credentials configured via:
|
||||
- Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
|
||||
- AWS credentials file (~/.aws/credentials)
|
||||
- IAM role (when running on AWS)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
region_name: str | None = None,
|
||||
embedding_model: str | None = None,
|
||||
generation_model: str | None = None,
|
||||
aws_access_key_id: str | None = None,
|
||||
aws_secret_access_key: str | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize Bedrock provider.
|
||||
|
||||
Args:
|
||||
region_name: AWS region (e.g., "us-east-1"). Defaults to AWS_REGION env var.
|
||||
embedding_model: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0").
|
||||
None disables embeddings.
|
||||
generation_model: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0").
|
||||
None disables generation.
|
||||
aws_access_key_id: AWS access key (optional, uses default credential chain if not provided)
|
||||
aws_secret_access_key: AWS secret key (optional, uses default credential chain if not provided)
|
||||
|
||||
Raises:
|
||||
ImportError: If boto3 is not installed
|
||||
"""
|
||||
if not BOTO3_AVAILABLE:
|
||||
raise ImportError(
|
||||
"boto3 is required for Bedrock provider. Install with: pip install boto3"
|
||||
)
|
||||
|
||||
self.embedding_model = embedding_model
|
||||
self.generation_model = generation_model
|
||||
self._dimension: int | None = None # Detected dynamically
|
||||
|
||||
# Initialize bedrock-runtime client
|
||||
client_kwargs: dict[str, Any] = {}
|
||||
if region_name:
|
||||
client_kwargs["region_name"] = region_name
|
||||
if aws_access_key_id:
|
||||
client_kwargs["aws_access_key_id"] = aws_access_key_id
|
||||
if aws_secret_access_key:
|
||||
client_kwargs["aws_secret_access_key"] = aws_secret_access_key
|
||||
|
||||
self.client = boto3.client("bedrock-runtime", **client_kwargs)
|
||||
|
||||
logger.info(
|
||||
f"Initialized Bedrock provider in region {region_name or 'default'} "
|
||||
f"(embedding_model={embedding_model}, generation_model={generation_model})"
|
||||
)
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return self.embedding_model is not None
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return self.generation_model is not None
|
||||
|
||||
def _create_embedding_request(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Create model-specific embedding request payload.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Request payload dict for the embedding model
|
||||
"""
|
||||
if not self.embedding_model:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
# Titan Embed models
|
||||
if self.embedding_model.startswith("amazon.titan-embed"):
|
||||
return {"inputText": text}
|
||||
|
||||
# Cohere Embed models
|
||||
elif self.embedding_model.startswith("cohere.embed"):
|
||||
return {"texts": [text], "input_type": "search_document"}
|
||||
|
||||
# Unknown model - try Titan format as default
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unknown embedding model format for {self.embedding_model}, "
|
||||
"using Titan format as default"
|
||||
)
|
||||
return {"inputText": text}
|
||||
|
||||
def _parse_embedding_response(self, response: dict[str, Any]) -> list[float]:
|
||||
"""
|
||||
Parse model-specific embedding response.
|
||||
|
||||
Args:
|
||||
response: Raw response from Bedrock
|
||||
|
||||
Returns:
|
||||
Embedding vector as list of floats
|
||||
"""
|
||||
# Titan Embed models
|
||||
if self.embedding_model and self.embedding_model.startswith(
|
||||
"amazon.titan-embed"
|
||||
):
|
||||
return response["embedding"]
|
||||
|
||||
# Cohere Embed models
|
||||
elif self.embedding_model and self.embedding_model.startswith("cohere.embed"):
|
||||
return response["embeddings"][0]
|
||||
|
||||
# Unknown model - try Titan format as default
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unknown embedding response format for {self.embedding_model}, "
|
||||
"trying Titan format"
|
||||
)
|
||||
return response.get("embedding", response.get("embeddings", [None])[0])
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
ClientError: If Bedrock API call fails
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
try:
|
||||
request_body = self._create_embedding_request(text)
|
||||
|
||||
response = self.client.invoke_model(
|
||||
modelId=self.embedding_model,
|
||||
body=json.dumps(request_body),
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
)
|
||||
|
||||
response_body = json.loads(response["body"].read())
|
||||
embedding = self._parse_embedding_response(response_body)
|
||||
|
||||
return embedding
|
||||
|
||||
except (BotoCoreError, ClientError) as e:
|
||||
logger.error(f"Bedrock embedding error: {e}")
|
||||
raise
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts.
|
||||
|
||||
Note: Current implementation sends requests sequentially.
|
||||
Future optimization could use asyncio for concurrent requests.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
ClientError: If Bedrock API call fails
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = await self.embed(text)
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
|
||||
async def _detect_dimension(self):
|
||||
"""
|
||||
Detect embedding dimension by generating a test embedding.
|
||||
"""
|
||||
if self._dimension is None and self.supports_embeddings:
|
||||
logger.debug(
|
||||
f"Detecting embedding dimension for model {self.embedding_model}..."
|
||||
)
|
||||
test_embedding = await self.embed("test")
|
||||
self._dimension = len(test_embedding)
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension for the configured embedding model
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
RuntimeError: If dimension not detected yet (call _detect_dimension first)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if self._dimension is None:
|
||||
raise RuntimeError(
|
||||
f"Embedding dimension not detected yet for model {self.embedding_model}. "
|
||||
"Call _detect_dimension() first or generate an embedding."
|
||||
)
|
||||
return self._dimension
|
||||
|
||||
def _create_generation_request(
|
||||
self, prompt: str, max_tokens: int
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Create model-specific text generation request payload.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Request payload dict for the generation model
|
||||
"""
|
||||
if not self.generation_model:
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported - no generation_model configured"
|
||||
)
|
||||
|
||||
# Anthropic Claude models
|
||||
if self.generation_model.startswith("anthropic.claude"):
|
||||
return {
|
||||
"anthropic_version": "bedrock-2023-05-31",
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.7,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
# Meta Llama models
|
||||
elif self.generation_model.startswith("meta.llama"):
|
||||
return {"prompt": prompt, "max_gen_len": max_tokens, "temperature": 0.7}
|
||||
|
||||
# Amazon Titan Text models
|
||||
elif self.generation_model.startswith("amazon.titan-text"):
|
||||
return {
|
||||
"inputText": prompt,
|
||||
"textGenerationConfig": {
|
||||
"maxTokenCount": max_tokens,
|
||||
"temperature": 0.7,
|
||||
},
|
||||
}
|
||||
|
||||
# Mistral models
|
||||
elif self.generation_model.startswith("mistral"):
|
||||
return {"prompt": prompt, "max_tokens": max_tokens, "temperature": 0.7}
|
||||
|
||||
# Unknown model - try Claude format as default
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unknown generation model format for {self.generation_model}, "
|
||||
"using Claude format as default"
|
||||
)
|
||||
return {
|
||||
"anthropic_version": "bedrock-2023-05-31",
|
||||
"max_tokens": max_tokens,
|
||||
"temperature": 0.7,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
}
|
||||
|
||||
def _parse_generation_response(self, response: dict[str, Any]) -> str:
|
||||
"""
|
||||
Parse model-specific text generation response.
|
||||
|
||||
Args:
|
||||
response: Raw response from Bedrock
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
"""
|
||||
# Anthropic Claude models
|
||||
if self.generation_model and self.generation_model.startswith(
|
||||
"anthropic.claude"
|
||||
):
|
||||
return response["content"][0]["text"]
|
||||
|
||||
# Meta Llama models
|
||||
elif self.generation_model and self.generation_model.startswith("meta.llama"):
|
||||
return response["generation"]
|
||||
|
||||
# Amazon Titan Text models
|
||||
elif self.generation_model and self.generation_model.startswith(
|
||||
"amazon.titan-text"
|
||||
):
|
||||
return response["results"][0]["outputText"]
|
||||
|
||||
# Mistral models
|
||||
elif self.generation_model and self.generation_model.startswith("mistral"):
|
||||
return response["outputs"][0]["text"]
|
||||
|
||||
# Unknown model - try common response fields
|
||||
else:
|
||||
logger.warning(
|
||||
f"Unknown generation response format for {self.generation_model}, "
|
||||
"trying common fields"
|
||||
)
|
||||
# Try common response field names
|
||||
for field in ["text", "generation", "outputText", "completion"]:
|
||||
if field in response:
|
||||
return response[field]
|
||||
# Last resort: return JSON string
|
||||
return json.dumps(response)
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If generation not enabled (no generation_model)
|
||||
ClientError: If Bedrock API call fails
|
||||
"""
|
||||
if not self.supports_generation:
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported - no generation_model configured"
|
||||
)
|
||||
|
||||
try:
|
||||
request_body = self._create_generation_request(prompt, max_tokens)
|
||||
|
||||
response = self.client.invoke_model(
|
||||
modelId=self.generation_model,
|
||||
body=json.dumps(request_body),
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
)
|
||||
|
||||
response_body = json.loads(response["body"].read())
|
||||
text = self._parse_generation_response(response_body)
|
||||
|
||||
return text
|
||||
|
||||
except (BotoCoreError, ClientError) as e:
|
||||
logger.error(f"Bedrock generation error: {e}")
|
||||
raise
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the client (no-op for boto3 clients)."""
|
||||
pass
|
||||
@@ -0,0 +1,221 @@
|
||||
"""Unified Ollama provider for embeddings and text generation."""
|
||||
|
||||
import logging
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OllamaProvider(Provider):
|
||||
"""
|
||||
Ollama provider supporting both embeddings and text generation.
|
||||
|
||||
Supports TLS, SSL verification, and automatic model loading.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str,
|
||||
embedding_model: str | None = None,
|
||||
generation_model: str | None = None,
|
||||
verify_ssl: bool = True,
|
||||
timeout: httpx.Timeout | None = None,
|
||||
):
|
||||
"""
|
||||
Initialize Ollama provider.
|
||||
|
||||
Args:
|
||||
base_url: Ollama API base URL (e.g., https://ollama.internal.example.com:443)
|
||||
embedding_model: Model for embeddings (e.g., "nomic-embed-text"). None disables embeddings.
|
||||
generation_model: Model for text generation (e.g., "llama3.2:1b"). None disables generation.
|
||||
verify_ssl: Verify SSL certificates (default: True)
|
||||
timeout: HTTP timeout configuration
|
||||
"""
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.embedding_model = embedding_model
|
||||
self.generation_model = generation_model
|
||||
self.verify_ssl = verify_ssl
|
||||
|
||||
if timeout is None:
|
||||
timeout = httpx.Timeout(timeout=120, connect=5)
|
||||
|
||||
self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
|
||||
self._dimension: int | None = None # Detected dynamically for embeddings
|
||||
|
||||
logger.info(
|
||||
f"Initialized Ollama provider: {base_url} "
|
||||
f"(embedding_model={embedding_model}, generation_model={generation_model}, "
|
||||
f"verify_ssl={verify_ssl})"
|
||||
)
|
||||
|
||||
# Pre-check and auto-load models
|
||||
if embedding_model:
|
||||
self._check_model_is_loaded(embedding_model, autoload=True)
|
||||
if generation_model:
|
||||
self._check_model_is_loaded(generation_model, autoload=True)
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return self.embedding_model is not None
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return self.generation_model is not None
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embeddings",
|
||||
json={"model": self.embedding_model, "prompt": text},
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts (batched requests).
|
||||
|
||||
Note: Ollama doesn't have native batch API, so we send requests sequentially.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = await self.embed(text)
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
|
||||
async def _detect_dimension(self):
|
||||
"""
|
||||
Detect embedding dimension by generating a test embedding.
|
||||
|
||||
This method queries the model to determine the actual dimension
|
||||
instead of relying on hardcoded values.
|
||||
"""
|
||||
if self._dimension is None and self.supports_embeddings:
|
||||
logger.debug(
|
||||
f"Detecting embedding dimension for model {self.embedding_model}..."
|
||||
)
|
||||
test_embedding = await self.embed("test")
|
||||
self._dimension = len(test_embedding)
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension for the configured embedding model
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
RuntimeError: If dimension not detected yet (call _detect_dimension first)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if self._dimension is None:
|
||||
raise RuntimeError(
|
||||
f"Embedding dimension not detected yet for model {self.embedding_model}. "
|
||||
"Call _detect_dimension() first or generate an embedding."
|
||||
)
|
||||
return self._dimension
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If generation not enabled (no generation_model)
|
||||
"""
|
||||
if not self.supports_generation:
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported - no generation_model configured"
|
||||
)
|
||||
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": self.generation_model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.7,
|
||||
},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["response"]
|
||||
|
||||
def _check_model_is_loaded(self, model: str, autoload: bool = True):
|
||||
"""
|
||||
Check if model is loaded in Ollama, optionally auto-loading it.
|
||||
|
||||
Args:
|
||||
model: Model name to check
|
||||
autoload: Whether to automatically pull the model if not loaded
|
||||
"""
|
||||
response = httpx.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
models = [m["name"] for m in response.json().get("models", [])]
|
||||
logger.info("Ollama has following models pre-loaded: %s", models)
|
||||
|
||||
if (model not in models) and autoload:
|
||||
logger.warning(
|
||||
"Model '%s' not yet available in ollama, attempting to pull now...",
|
||||
model,
|
||||
)
|
||||
response = httpx.post(f"{self.base_url}/api/pull", json={"model": model})
|
||||
response.raise_for_status()
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close HTTP client."""
|
||||
await self.client.aclose()
|
||||
@@ -0,0 +1,126 @@
|
||||
"""Provider registry and factory for auto-detection and instantiation."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProviderRegistry:
|
||||
"""
|
||||
Registry for provider auto-detection and instantiation.
|
||||
|
||||
Checks environment variables in priority order and creates appropriate provider:
|
||||
1. Bedrock (AWS_REGION + BEDROCK_*_MODEL)
|
||||
2. Ollama (OLLAMA_BASE_URL)
|
||||
3. Simple (fallback for testing/development)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def create_provider() -> Provider:
|
||||
"""
|
||||
Auto-detect and create provider based on environment variables.
|
||||
|
||||
Priority order:
|
||||
1. Bedrock - if AWS_REGION or BEDROCK_EMBEDDING_MODEL is set
|
||||
2. Ollama - if OLLAMA_BASE_URL is set
|
||||
3. Simple - fallback for testing/development
|
||||
|
||||
Returns:
|
||||
Provider instance
|
||||
|
||||
Environment Variables:
|
||||
Bedrock:
|
||||
- AWS_REGION: AWS region (e.g., "us-east-1")
|
||||
- AWS_ACCESS_KEY_ID: AWS access key (optional, uses credential chain)
|
||||
- AWS_SECRET_ACCESS_KEY: AWS secret key (optional)
|
||||
- BEDROCK_EMBEDDING_MODEL: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
|
||||
- BEDROCK_GENERATION_MODEL: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
|
||||
|
||||
Ollama:
|
||||
- OLLAMA_BASE_URL: Ollama API base URL (e.g., "http://localhost:11434")
|
||||
- OLLAMA_EMBEDDING_MODEL: Model for embeddings (default: "nomic-embed-text")
|
||||
- OLLAMA_GENERATION_MODEL: Model for text generation (e.g., "llama3.2:1b")
|
||||
- OLLAMA_VERIFY_SSL: Verify SSL certificates (default: "true")
|
||||
|
||||
Simple (no configuration needed, fallback):
|
||||
- SIMPLE_EMBEDDING_DIMENSION: Embedding dimension (default: 384)
|
||||
"""
|
||||
# 1. Check for Bedrock
|
||||
aws_region = os.getenv("AWS_REGION")
|
||||
bedrock_embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL")
|
||||
bedrock_generation_model = os.getenv("BEDROCK_GENERATION_MODEL")
|
||||
|
||||
if aws_region or bedrock_embedding_model or bedrock_generation_model:
|
||||
logger.info(
|
||||
f"Using Bedrock provider: region={aws_region}, "
|
||||
f"embedding_model={bedrock_embedding_model}, "
|
||||
f"generation_model={bedrock_generation_model}"
|
||||
)
|
||||
return BedrockProvider(
|
||||
region_name=aws_region,
|
||||
embedding_model=bedrock_embedding_model,
|
||||
generation_model=bedrock_generation_model,
|
||||
aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
)
|
||||
|
||||
# 2. Check for Ollama
|
||||
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if ollama_url:
|
||||
embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
|
||||
generation_model = os.getenv("OLLAMA_GENERATION_MODEL")
|
||||
verify_ssl = os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true"
|
||||
|
||||
logger.info(
|
||||
f"Using Ollama provider: {ollama_url}, "
|
||||
f"embedding_model={embedding_model}, "
|
||||
f"generation_model={generation_model}"
|
||||
)
|
||||
return OllamaProvider(
|
||||
base_url=ollama_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=generation_model,
|
||||
verify_ssl=verify_ssl,
|
||||
)
|
||||
|
||||
# 3. Fallback to Simple provider for development/testing
|
||||
dimension = int(os.getenv("SIMPLE_EMBEDDING_DIMENSION", "384"))
|
||||
logger.warning(
|
||||
"No provider configured (AWS_REGION, OLLAMA_BASE_URL not set). "
|
||||
"Using SimpleProvider for testing/development. "
|
||||
"For production, configure Bedrock or Ollama."
|
||||
)
|
||||
return SimpleProvider(dimension=dimension)
|
||||
|
||||
|
||||
# Singleton instance
|
||||
_provider: Provider | None = None
|
||||
|
||||
|
||||
def get_provider() -> Provider:
|
||||
"""
|
||||
Get singleton provider instance.
|
||||
|
||||
Returns:
|
||||
Global Provider instance (auto-detected on first call)
|
||||
"""
|
||||
global _provider
|
||||
if _provider is None:
|
||||
_provider = ProviderRegistry.create_provider()
|
||||
return _provider
|
||||
|
||||
|
||||
def reset_provider():
|
||||
"""
|
||||
Reset singleton provider instance.
|
||||
|
||||
Useful for testing or reconfiguration.
|
||||
"""
|
||||
global _provider
|
||||
_provider = None
|
||||
@@ -0,0 +1,149 @@
|
||||
"""Simple in-process embedding provider for testing.
|
||||
|
||||
This provider uses a basic TF-IDF-like approach with feature hashing to generate
|
||||
deterministic embeddings without requiring external services. Suitable for testing
|
||||
but not for production use.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import math
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
from .base import Provider
|
||||
|
||||
|
||||
class SimpleProvider(Provider):
|
||||
"""Simple deterministic embedding provider using feature hashing.
|
||||
|
||||
This implementation:
|
||||
- Tokenizes text into words
|
||||
- Uses feature hashing to map words to fixed-size vectors
|
||||
- Applies TF-IDF-like weighting
|
||||
- Normalizes vectors to unit length
|
||||
|
||||
Not suitable for production but good for testing semantic search infrastructure.
|
||||
Only supports embeddings, not text generation.
|
||||
"""
|
||||
|
||||
def __init__(self, dimension: int = 384):
|
||||
"""Initialize simple embedding provider.
|
||||
|
||||
Args:
|
||||
dimension: Embedding dimension (default: 384)
|
||||
"""
|
||||
self.dimension = dimension
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return True
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return False
|
||||
|
||||
def _tokenize(self, text: str) -> list[str]:
|
||||
"""Tokenize text into lowercase words.
|
||||
|
||||
Args:
|
||||
text: Input text
|
||||
|
||||
Returns:
|
||||
List of lowercase word tokens
|
||||
"""
|
||||
# Simple word tokenization
|
||||
text = text.lower()
|
||||
words = re.findall(r"\b\w+\b", text)
|
||||
return words
|
||||
|
||||
def _hash_word(self, word: str) -> int:
|
||||
"""Hash word to dimension index.
|
||||
|
||||
Args:
|
||||
word: Word to hash
|
||||
|
||||
Returns:
|
||||
Index in range [0, dimension)
|
||||
"""
|
||||
hash_bytes = hashlib.md5(word.encode()).digest()
|
||||
hash_int = int.from_bytes(hash_bytes[:4], byteorder="big")
|
||||
return hash_int % self.dimension
|
||||
|
||||
def _embed_single(self, text: str) -> list[float]:
|
||||
"""Generate embedding for single text.
|
||||
|
||||
Args:
|
||||
text: Input text
|
||||
|
||||
Returns:
|
||||
Normalized embedding vector
|
||||
"""
|
||||
tokens = self._tokenize(text)
|
||||
if not tokens:
|
||||
return [0.0] * self.dimension
|
||||
|
||||
# Count term frequencies
|
||||
term_freq = Counter(tokens)
|
||||
|
||||
# Initialize vector
|
||||
vector = [0.0] * self.dimension
|
||||
|
||||
# Apply TF weighting with feature hashing
|
||||
for word, count in term_freq.items():
|
||||
idx = self._hash_word(word)
|
||||
# Simple TF weighting: log(1 + count)
|
||||
vector[idx] += math.log1p(count)
|
||||
|
||||
# Normalize to unit length
|
||||
norm = math.sqrt(sum(x * x for x in vector))
|
||||
if norm > 0:
|
||||
vector = [x / norm for x in vector]
|
||||
|
||||
return vector
|
||||
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
"""
|
||||
return self._embed_single(text)
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""Generate embeddings for multiple texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
"""
|
||||
return [self._embed_single(text) for text in texts]
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension
|
||||
"""
|
||||
return self.dimension
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Raises:
|
||||
NotImplementedError: Simple provider doesn't support text generation
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported by Simple provider - use Ollama, Anthropic, or Bedrock"
|
||||
)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the provider (no-op for simple provider)."""
|
||||
pass
|
||||
Reference in New Issue
Block a user