feat: add unified provider architecture with Amazon Bedrock support
Refactored LLM provider infrastructure to support sustainable additions of new providers with both embedding and text generation capabilities.
## Major Changes
### Unified Provider Architecture (ADR-015)
- Created `nextcloud_mcp_server/providers/` with unified Provider ABC
- Providers now support optional capabilities (embeddings and/or generation)
- Auto-detection registry with priority: Bedrock → Ollama → Simple
- Backward compatible - existing code continues to work
### New Providers
- **BedrockProvider**: Full Amazon Bedrock integration
- Embeddings: Titan Embed, Cohere Embed models
- Generation: Claude, Llama, Titan Text, Mistral models
- Model-specific request/response handling
- AWS credential chain integration
- **OllamaProvider**: Migrated with both capabilities support
- **AnthropicProvider**: Moved from test code to production providers
- **SimpleProvider**: Migrated in-memory fallback provider
### Breaking Changes
None - full backward compatibility maintained:
- `embedding.get_embedding_service()` still works
- RAG evaluation tests updated to use unified providers
- All existing tests pass (127 unit tests)
### Testing
- Added 9 comprehensive Bedrock unit tests with mocked boto3
- All existing unit tests pass
- Type checking (ty) and linting (ruff) pass
- Verified backward compatibility
### Documentation
- `docs/ADR-015-unified-provider-architecture.md`: Comprehensive ADR
- `docs/bedrock-setup.md`: AWS setup guide with IAM permissions
- `CLAUDE.md`: Updated with provider architecture section
### Dependencies
- Added `boto3>=1.35.0` to dev dependencies (optional)
## Environment Variables
### Bedrock
- `AWS_REGION`: AWS region (e.g., "us-east-1")
- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings
- `BEDROCK_GENERATION_MODEL`: Model ID for generation
- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`: Optional credentials
### Ollama
- `OLLAMA_BASE_URL`: API URL
- `OLLAMA_EMBEDDING_MODEL`: Embedding model (default: "nomic-embed-text")
- `OLLAMA_GENERATION_MODEL`: Generation model
## AWS Bedrock Permissions Required
Minimal IAM policy:
```json
{
"Effect": "Allow",
"Action": ["bedrock:InvokeModel"],
"Resource": ["arn:aws:bedrock:*::foundation-model/*"]
}
```
See `docs/bedrock-setup.md` for detailed setup instructions.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,99 +1,20 @@
|
||||
"""LLM provider abstraction for RAG evaluation.
|
||||
|
||||
Supports Ollama (local) and Anthropic (cloud) providers for both ground truth
|
||||
DEPRECATED: This module is maintained for backward compatibility with RAG evaluation tests.
|
||||
New code should use nextcloud_mcp_server.providers directly.
|
||||
|
||||
Supports Ollama (local), Anthropic (cloud), and Bedrock (AWS) providers for both ground truth
|
||||
generation and evaluation.
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import Protocol
|
||||
|
||||
import httpx
|
||||
from anthropic import AsyncAnthropic
|
||||
|
||||
|
||||
class LLMProvider(Protocol):
|
||||
"""Protocol for LLM providers."""
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
"""
|
||||
...
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the provider and release resources."""
|
||||
...
|
||||
|
||||
|
||||
class OllamaProvider:
|
||||
"""Ollama provider for local LLM inference."""
|
||||
|
||||
def __init__(self, base_url: str, model: str):
|
||||
"""Initialize Ollama provider.
|
||||
|
||||
Args:
|
||||
base_url: Ollama API base URL (e.g., http://localhost:11434)
|
||||
model: Model name (e.g., llama3.1:8b)
|
||||
"""
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
self.client = httpx.AsyncClient(timeout=600.0) # 10 min timeout for generation
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""Generate text using Ollama API."""
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/generate",
|
||||
json={
|
||||
"model": self.model,
|
||||
"prompt": prompt,
|
||||
"stream": False,
|
||||
"options": {
|
||||
"num_predict": max_tokens,
|
||||
"temperature": 0.7,
|
||||
},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["response"]
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client."""
|
||||
await self.client.aclose()
|
||||
|
||||
|
||||
class AnthropicProvider:
|
||||
"""Anthropic provider for cloud LLM inference."""
|
||||
|
||||
def __init__(self, api_key: str, model: str):
|
||||
"""Initialize Anthropic provider.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key
|
||||
model: Model name (e.g., claude-3-5-sonnet-20241022)
|
||||
"""
|
||||
self.client = AsyncAnthropic(api_key=api_key)
|
||||
self.model = model
|
||||
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""Generate text using Anthropic API."""
|
||||
message = await self.client.messages.create(
|
||||
model=self.model,
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.7,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
)
|
||||
return message.content[0].text
|
||||
|
||||
async def close(self):
|
||||
"""Close the client (no-op for Anthropic)."""
|
||||
pass
|
||||
from nextcloud_mcp_server.providers import (
|
||||
AnthropicProvider,
|
||||
BedrockProvider,
|
||||
OllamaProvider,
|
||||
Provider,
|
||||
)
|
||||
|
||||
|
||||
def create_llm_provider(
|
||||
@@ -102,18 +23,24 @@ def create_llm_provider(
|
||||
ollama_model: str | None = None,
|
||||
anthropic_api_key: str | None = None,
|
||||
anthropic_model: str | None = None,
|
||||
) -> LLMProvider:
|
||||
bedrock_region: str | None = None,
|
||||
bedrock_model: str | None = None,
|
||||
) -> Provider:
|
||||
"""Create an LLM provider from environment variables or arguments.
|
||||
|
||||
Args:
|
||||
provider: Provider type ('ollama' or 'anthropic'). Defaults to RAG_EVAL_PROVIDER env var or 'ollama'
|
||||
provider: Provider type ('ollama', 'anthropic', or 'bedrock').
|
||||
Defaults to RAG_EVAL_PROVIDER env var or 'ollama'
|
||||
ollama_base_url: Ollama base URL. Defaults to RAG_EVAL_OLLAMA_BASE_URL or 'http://localhost:11434'
|
||||
ollama_model: Ollama model. Defaults to RAG_EVAL_OLLAMA_MODEL or 'llama3.1:8b'
|
||||
ollama_model: Ollama model. Defaults to RAG_EVAL_OLLAMA_MODEL or 'llama3.2:1b'
|
||||
anthropic_api_key: Anthropic API key. Defaults to RAG_EVAL_ANTHROPIC_API_KEY env var
|
||||
anthropic_model: Anthropic model. Defaults to RAG_EVAL_ANTHROPIC_MODEL or 'claude-3-5-sonnet-20241022'
|
||||
bedrock_region: AWS region. Defaults to RAG_EVAL_BEDROCK_REGION or AWS_REGION env var
|
||||
bedrock_model: Bedrock model ID. Defaults to RAG_EVAL_BEDROCK_MODEL or
|
||||
'anthropic.claude-3-sonnet-20240229-v1:0'
|
||||
|
||||
Returns:
|
||||
LLMProvider instance
|
||||
Provider instance
|
||||
|
||||
Raises:
|
||||
ValueError: If provider is invalid or required credentials are missing
|
||||
@@ -130,7 +57,9 @@ def create_llm_provider(
|
||||
or "http://localhost:11434"
|
||||
)
|
||||
model = ollama_model or os.environ.get("RAG_EVAL_OLLAMA_MODEL", "llama3.2:1b")
|
||||
return OllamaProvider(base_url=base_url, model=model)
|
||||
return OllamaProvider(
|
||||
base_url=base_url, embedding_model=None, generation_model=model
|
||||
)
|
||||
|
||||
elif provider == "anthropic":
|
||||
api_key = anthropic_api_key or os.environ.get("RAG_EVAL_ANTHROPIC_API_KEY")
|
||||
@@ -143,7 +72,18 @@ def create_llm_provider(
|
||||
)
|
||||
return AnthropicProvider(api_key=api_key, model=model)
|
||||
|
||||
elif provider == "bedrock":
|
||||
region = bedrock_region or os.environ.get(
|
||||
"RAG_EVAL_BEDROCK_REGION", os.environ.get("AWS_REGION", "us-east-1")
|
||||
)
|
||||
model = bedrock_model or os.environ.get(
|
||||
"RAG_EVAL_BEDROCK_MODEL", "anthropic.claude-3-sonnet-20240229-v1:0"
|
||||
)
|
||||
return BedrockProvider(
|
||||
region_name=region, embedding_model=None, generation_model=model
|
||||
)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid provider: {provider}. Must be 'ollama' or 'anthropic'."
|
||||
f"Invalid provider: {provider}. Must be 'ollama', 'anthropic', or 'bedrock'."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user