feat: add unified provider architecture with Amazon Bedrock support

Refactored LLM provider infrastructure to support sustainable additions of new providers with both embedding and text generation capabilities. ## Major Changes ### Unified Provider Architecture (ADR-015) - Created `nextcloud_mcp_server/providers/` with unified Provider ABC - Providers now support optional capabilities (embeddings and/or generation) - Auto-detection registry with priority: Bedrock → Ollama → Simple - Backward compatible - existing code continues to work ### New Providers - **BedrockProvider**: Full Amazon Bedrock integration - Embeddings: Titan Embed, Cohere Embed models - Generation: Claude, Llama, Titan Text, Mistral models - Model-specific request/response handling - AWS credential chain integration - **OllamaProvider**: Migrated with both capabilities support - **AnthropicProvider**: Moved from test code to production providers - **SimpleProvider**: Migrated in-memory fallback provider ### Breaking Changes None - full backward compatibility maintained: - `embedding.get_embedding_service()` still works - RAG evaluation tests updated to use unified providers - All existing tests pass (127 unit tests) ### Testing - Added 9 comprehensive Bedrock unit tests with mocked boto3 - All existing unit tests pass - Type checking (ty) and linting (ruff) pass - Verified backward compatibility ### Documentation - `docs/ADR-015-unified-provider-architecture.md`: Comprehensive ADR - `docs/bedrock-setup.md`: AWS setup guide with IAM permissions - `CLAUDE.md`: Updated with provider architecture section ### Dependencies - Added `boto3>=1.35.0` to dev dependencies (optional) ## Environment Variables ### Bedrock - `AWS_REGION`: AWS region (e.g., "us-east-1") - `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings - `BEDROCK_GENERATION_MODEL`: Model ID for generation - `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`: Optional credentials ### Ollama - `OLLAMA_BASE_URL`: API URL - `OLLAMA_EMBEDDING_MODEL`: Embedding model (default: "nomic-embed-text") - `OLLAMA_GENERATION_MODEL`: Generation model ## AWS Bedrock Permissions Required Minimal IAM policy: ```json { "Effect": "Allow", "Action": ["bedrock:InvokeModel"], "Resource": ["arn:aws:bedrock:*::foundation-model/*"] } ``` See `docs/bedrock-setup.md` for detailed setup instructions. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 11:36:58 +01:00
parent f559ca049e
commit 5b484c9226
16 changed files with 2252 additions and 137 deletions
@@ -1,99 +1,20 @@
 """LLM provider abstraction for RAG evaluation.

-Supports Ollama (local) and Anthropic (cloud) providers for both ground truth
+DEPRECATED: This module is maintained for backward compatibility with RAG evaluation tests.
+New code should use nextcloud_mcp_server.providers directly.
+
+Supports Ollama (local), Anthropic (cloud), and Bedrock (AWS) providers for both ground truth
 generation and evaluation.
 """

 import os
-from typing import Protocol

-import httpx
-from anthropic import AsyncAnthropic
-
-
-class LLMProvider(Protocol):
-    """Protocol for LLM providers."""
-
-    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
-        """Generate text from a prompt.
-
-        Args:
-            prompt: The prompt to generate from
-            max_tokens: Maximum tokens to generate
-
-        Returns:
-            Generated text
-        """
-        ...
-
-    async def close(self) -> None:
-        """Close the provider and release resources."""
-        ...
-
-
-class OllamaProvider:
-    """Ollama provider for local LLM inference."""
-
-    def __init__(self, base_url: str, model: str):
-        """Initialize Ollama provider.
-
-        Args:
-            base_url: Ollama API base URL (e.g., http://localhost:11434)
-            model: Model name (e.g., llama3.1:8b)
-        """
-        self.base_url = base_url.rstrip("/")
-        self.model = model
-        self.client = httpx.AsyncClient(timeout=600.0)  # 10 min timeout for generation
-
-    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
-        """Generate text using Ollama API."""
-        response = await self.client.post(
-            f"{self.base_url}/api/generate",
-            json={
-                "model": self.model,
-                "prompt": prompt,
-                "stream": False,
-                "options": {
-                    "num_predict": max_tokens,
-                    "temperature": 0.7,
-                },
-            },
-        )
-        response.raise_for_status()
-        data = response.json()
-        return data["response"]
-
-    async def close(self):
-        """Close the HTTP client."""
-        await self.client.aclose()
-
-
-class AnthropicProvider:
-    """Anthropic provider for cloud LLM inference."""
-
-    def __init__(self, api_key: str, model: str):
-        """Initialize Anthropic provider.
-
-        Args:
-            api_key: Anthropic API key
-            model: Model name (e.g., claude-3-5-sonnet-20241022)
-        """
-        self.client = AsyncAnthropic(api_key=api_key)
-        self.model = model
-
-    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
-        """Generate text using Anthropic API."""
-        message = await self.client.messages.create(
-            model=self.model,
-            max_tokens=max_tokens,
-            temperature=0.7,
-            messages=[{"role": "user", "content": prompt}],
-        )
-        return message.content[0].text
-
-    async def close(self):
-        """Close the client (no-op for Anthropic)."""
-        pass
+from nextcloud_mcp_server.providers import (
+    AnthropicProvider,
+    BedrockProvider,
+    OllamaProvider,
+    Provider,
+)


 def create_llm_provider(
@@ -102,18 +23,24 @@ def create_llm_provider(
    ollama_model: str | None = None,
    anthropic_api_key: str | None = None,
    anthropic_model: str | None = None,
-) -> LLMProvider:
+    bedrock_region: str | None = None,
+    bedrock_model: str | None = None,
+) -> Provider:
    """Create an LLM provider from environment variables or arguments.

    Args:
-        provider: Provider type ('ollama' or 'anthropic'). Defaults to RAG_EVAL_PROVIDER env var or 'ollama'
+        provider: Provider type ('ollama', 'anthropic', or 'bedrock').
+            Defaults to RAG_EVAL_PROVIDER env var or 'ollama'
        ollama_base_url: Ollama base URL. Defaults to RAG_EVAL_OLLAMA_BASE_URL or 'http://localhost:11434'
-        ollama_model: Ollama model. Defaults to RAG_EVAL_OLLAMA_MODEL or 'llama3.1:8b'
+        ollama_model: Ollama model. Defaults to RAG_EVAL_OLLAMA_MODEL or 'llama3.2:1b'
        anthropic_api_key: Anthropic API key. Defaults to RAG_EVAL_ANTHROPIC_API_KEY env var
        anthropic_model: Anthropic model. Defaults to RAG_EVAL_ANTHROPIC_MODEL or 'claude-3-5-sonnet-20241022'
+        bedrock_region: AWS region. Defaults to RAG_EVAL_BEDROCK_REGION or AWS_REGION env var
+        bedrock_model: Bedrock model ID. Defaults to RAG_EVAL_BEDROCK_MODEL or
+            'anthropic.claude-3-sonnet-20240229-v1:0'

    Returns:
-        LLMProvider instance
+        Provider instance

    Raises:
        ValueError: If provider is invalid or required credentials are missing
@@ -130,7 +57,9 @@ def create_llm_provider(
            or "http://localhost:11434"
        )
        model = ollama_model or os.environ.get("RAG_EVAL_OLLAMA_MODEL", "llama3.2:1b")
-        return OllamaProvider(base_url=base_url, model=model)
+        return OllamaProvider(
+            base_url=base_url, embedding_model=None, generation_model=model
+        )

    elif provider == "anthropic":
        api_key = anthropic_api_key or os.environ.get("RAG_EVAL_ANTHROPIC_API_KEY")
@@ -143,7 +72,18 @@ def create_llm_provider(
        )
        return AnthropicProvider(api_key=api_key, model=model)

+    elif provider == "bedrock":
+        region = bedrock_region or os.environ.get(
+            "RAG_EVAL_BEDROCK_REGION", os.environ.get("AWS_REGION", "us-east-1")
+        )
+        model = bedrock_model or os.environ.get(
+            "RAG_EVAL_BEDROCK_MODEL", "anthropic.claude-3-sonnet-20240229-v1:0"
+        )
+        return BedrockProvider(
+            region_name=region, embedding_model=None, generation_model=model
+        )
+
    else:
        raise ValueError(
-            f"Invalid provider: {provider}. Must be 'ollama' or 'anthropic'."
+            f"Invalid provider: {provider}. Must be 'ollama', 'anthropic', or 'bedrock'."
        )