diff --git a/.github/workflows/rag-evaluation.yml b/.github/workflows/rag-evaluation.yml index a5446b5..ff479c0 100644 --- a/.github/workflows/rag-evaluation.yml +++ b/.github/workflows/rag-evaluation.yml @@ -25,21 +25,6 @@ jobs: steps: - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0 - with: - submodules: 'true' - - ###### Required to build OIDC App ###### - - name: Set up php 8.4 - uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2 - with: - php-version: 8.4 - coverage: none - - - name: Install OIDC app composer dependencies - run: | - cd third_party/oidc - composer install --no-dev - ###### Required to build OIDC App ###### - name: Run docker compose with vector sync uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1 @@ -101,7 +86,7 @@ jobs: OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }} OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }} run: | - uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO + uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO --provider openai - name: Upload test results if: always() diff --git a/nextcloud_mcp_server/providers/anthropic.py b/nextcloud_mcp_server/providers/anthropic.py index dc71fa3..947abbb 100644 --- a/nextcloud_mcp_server/providers/anthropic.py +++ b/nextcloud_mcp_server/providers/anthropic.py @@ -17,18 +17,20 @@ class AnthropicProvider(Provider): Note: Anthropic doesn't provide embedding models, only text generation. """ - def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"): + def __init__( + self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022" + ): """ Initialize Anthropic provider. Args: api_key: Anthropic API key - model: Model name (e.g., "claude-3-5-sonnet-20241022") + generation_model: Model name (e.g., "claude-3-5-sonnet-20241022") """ self.client = AsyncAnthropic(api_key=api_key) - self.model = model + self.model = generation_model - logger.info(f"Initialized Anthropic provider (model={model})") + logger.info(f"Initialized Anthropic provider (model={self.model})") @property def supports_embeddings(self) -> bool: diff --git a/nextcloud_mcp_server/server/semantic.py b/nextcloud_mcp_server/server/semantic.py index 3d88369..f3114a5 100644 --- a/nextcloud_mcp_server/server/semantic.py +++ b/nextcloud_mcp_server/server/semantic.py @@ -499,9 +499,11 @@ def configure_semantic_tools(mcp: FastMCP): ) # 6. Request LLM completion via MCP sampling with timeout + # Note: 5 minute timeout to accommodate slower local LLMs (e.g., Ollama) + sampling_timeout_seconds = 300 try: - with anyio.fail_after(30): + with anyio.fail_after(sampling_timeout_seconds): sampling_result = await ctx.session.create_message( messages=[ SamplingMessage( @@ -548,14 +550,14 @@ def configure_semantic_tools(mcp: FastMCP): except TimeoutError: logger.warning( - f"Sampling request timed out after 30 seconds for query: '{query}', " + f"Sampling request timed out after {sampling_timeout_seconds} seconds for query: '{query}', " f"returning search results only" ) return SamplingSearchResponse( query=query, generated_answer=( f"[Sampling request timed out]\n\n" - f"The answer generation took too long (>30s). " + f"The answer generation took too long (>{sampling_timeout_seconds}s). " f"Found {len(accessible_results)} relevant documents. " f"Please review the sources below or try a simpler query." ), @@ -675,15 +677,22 @@ def configure_semantic_tools(mcp: FastMCP): # Get Qdrant client and query indexed count indexed_count = 0 try: + from qdrant_client.models import Filter + from nextcloud_mcp_server.config import get_settings + from nextcloud_mcp_server.vector.placeholder import ( + get_placeholder_filter, + ) from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client settings = get_settings() qdrant_client = await get_qdrant_client() - # Count documents in collection + # Count documents in collection, excluding placeholders + # Placeholders are zero-vector points used to track processing state count_result = await qdrant_client.count( - collection_name=settings.get_collection_name() + collection_name=settings.get_collection_name(), + count_filter=Filter(must=[get_placeholder_filter()]), ) indexed_count = count_result.count diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py new file mode 100644 index 0000000..de3051b --- /dev/null +++ b/tests/integration/conftest.py @@ -0,0 +1,26 @@ +"""Pytest configuration for integration tests. + +This conftest.py provides hooks and fixtures specific to integration tests, +including the --provider flag for RAG tests. +""" + +# Valid provider names +VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"] + + +def pytest_addoption(parser): + """Add --provider command line option for RAG tests.""" + parser.addoption( + "--provider", + action="store", + default=None, + choices=VALID_PROVIDERS, + help="LLM provider for RAG tests: openai, ollama, anthropic, bedrock", + ) + + +def pytest_configure(config): + """Configure custom markers.""" + config.addinivalue_line( + "markers", "rag: mark test as RAG integration test (requires --provider flag)" + ) diff --git a/tests/integration/provider_fixtures.py b/tests/integration/provider_fixtures.py new file mode 100644 index 0000000..f8d90ec --- /dev/null +++ b/tests/integration/provider_fixtures.py @@ -0,0 +1,264 @@ +"""Provider fixtures for integration tests. + +This module provides pytest fixtures that configure LLM providers based on +an explicit --provider flag. Supports OpenAI, Ollama, Anthropic, and Bedrock. + +Usage: + pytest tests/integration/test_rag.py --provider=openai + pytest tests/integration/test_rag.py --provider=ollama + pytest tests/integration/test_rag.py --provider=anthropic + pytest tests/integration/test_rag.py --provider=bedrock + +Environment Variables by Provider: + +OpenAI: + OPENAI_API_KEY: API key (required) + OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference") + OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small") + OPENAI_GENERATION_MODEL: Generation model (default: "gpt-4o-mini") + +Ollama: + OLLAMA_BASE_URL: API URL (required, e.g., "http://localhost:11434") + OLLAMA_EMBEDDING_MODEL: Embedding model (default: "nomic-embed-text") + OLLAMA_GENERATION_MODEL: Generation model (default: "llama3.2:1b") + +Anthropic: + ANTHROPIC_API_KEY: API key (required) + ANTHROPIC_GENERATION_MODEL: Model (default: "claude-3-haiku-20240307") + +Bedrock: + AWS_REGION: AWS region (required) + BEDROCK_EMBEDDING_MODEL: Embedding model ID + BEDROCK_GENERATION_MODEL: Generation model ID +""" + +import logging +import os +from typing import AsyncGenerator + +import pytest + +from nextcloud_mcp_server.providers.base import Provider + +logger = logging.getLogger(__name__) + +# Valid provider names (must match conftest.py) +VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"] + + +async def create_generation_provider(provider_name: str) -> Provider: + """Create a provider configured for text generation. + + Args: + provider_name: One of "openai", "ollama", "anthropic", "bedrock" + + Returns: + Provider instance configured for generation + + Raises: + ValueError: If provider_name is invalid or required env vars missing + """ + if provider_name == "openai": + from nextcloud_mcp_server.providers.openai import OpenAIProvider + + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable required") + + base_url = os.getenv("OPENAI_BASE_URL") + generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini") + + # GitHub Models API requires model name prefix + if base_url and "models.github.ai" in base_url: + if not generation_model.startswith("openai/"): + generation_model = f"openai/{generation_model}" + + provider = OpenAIProvider( + api_key=api_key, + base_url=base_url, + embedding_model=None, # Generation only + generation_model=generation_model, + ) + logger.info(f"Created OpenAI generation provider: model={generation_model}") + return provider + + elif provider_name == "ollama": + from nextcloud_mcp_server.providers.ollama import OllamaProvider + + base_url = os.getenv("OLLAMA_BASE_URL") + if not base_url: + raise ValueError("OLLAMA_BASE_URL environment variable required") + + generation_model = os.getenv("OLLAMA_GENERATION_MODEL", "llama3.2:1b") + + provider = OllamaProvider( + base_url=base_url, + embedding_model=None, # Generation only + generation_model=generation_model, + ) + logger.info(f"Created Ollama generation provider: model={generation_model}") + return provider + + elif provider_name == "anthropic": + from nextcloud_mcp_server.providers.anthropic import AnthropicProvider + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError("ANTHROPIC_API_KEY environment variable required") + + generation_model = os.getenv( + "ANTHROPIC_GENERATION_MODEL", "claude-3-haiku-20240307" + ) + + provider = AnthropicProvider( + api_key=api_key, + generation_model=generation_model, + ) + logger.info(f"Created Anthropic generation provider: model={generation_model}") + return provider + + elif provider_name == "bedrock": + from nextcloud_mcp_server.providers.bedrock import BedrockProvider + + region = os.getenv("AWS_REGION") + if not region: + raise ValueError("AWS_REGION environment variable required") + + generation_model = os.getenv("BEDROCK_GENERATION_MODEL") + if not generation_model: + raise ValueError("BEDROCK_GENERATION_MODEL environment variable required") + + provider = BedrockProvider( + region=region, + embedding_model=None, # Generation only + generation_model=generation_model, + ) + logger.info(f"Created Bedrock generation provider: model={generation_model}") + return provider + + else: + raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}") + + +async def create_embedding_provider(provider_name: str) -> Provider: + """Create a provider configured for embeddings. + + Args: + provider_name: One of "openai", "ollama", "bedrock" + (Anthropic does not support embeddings) + + Returns: + Provider instance configured for embeddings + + Raises: + ValueError: If provider_name is invalid, doesn't support embeddings, + or required env vars missing + """ + if provider_name == "anthropic": + raise ValueError("Anthropic does not support embeddings") + + if provider_name == "openai": + from nextcloud_mcp_server.providers.openai import OpenAIProvider + + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise ValueError("OPENAI_API_KEY environment variable required") + + base_url = os.getenv("OPENAI_BASE_URL") + embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") + + # GitHub Models API requires model name prefix + if base_url and "models.github.ai" in base_url: + if not embedding_model.startswith("openai/"): + embedding_model = f"openai/{embedding_model}" + + provider = OpenAIProvider( + api_key=api_key, + base_url=base_url, + embedding_model=embedding_model, + generation_model=None, # Embeddings only + ) + logger.info(f"Created OpenAI embedding provider: model={embedding_model}") + return provider + + elif provider_name == "ollama": + from nextcloud_mcp_server.providers.ollama import OllamaProvider + + base_url = os.getenv("OLLAMA_BASE_URL") + if not base_url: + raise ValueError("OLLAMA_BASE_URL environment variable required") + + embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text") + + provider = OllamaProvider( + base_url=base_url, + embedding_model=embedding_model, + generation_model=None, # Embeddings only + ) + logger.info(f"Created Ollama embedding provider: model={embedding_model}") + return provider + + elif provider_name == "bedrock": + from nextcloud_mcp_server.providers.bedrock import BedrockProvider + + region = os.getenv("AWS_REGION") + if not region: + raise ValueError("AWS_REGION environment variable required") + + embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL") + if not embedding_model: + raise ValueError("BEDROCK_EMBEDDING_MODEL environment variable required") + + provider = BedrockProvider( + region=region, + embedding_model=embedding_model, + generation_model=None, # Embeddings only + ) + logger.info(f"Created Bedrock embedding provider: model={embedding_model}") + return provider + + else: + raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}") + + +# ============================================================================= +# Pytest Fixtures +# ============================================================================= + + +@pytest.fixture(scope="module") +def provider_name(request) -> str: + """Get the provider name from --provider flag. + + Raises pytest.skip if --provider not specified. + """ + name = request.config.getoption("--provider") + if not name: + pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)") + return name + + +@pytest.fixture(scope="module") +async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]: + """Fixture providing a generation-capable provider. + + Requires --provider flag to be set. + """ + provider = await create_generation_provider(provider_name) + yield provider + await provider.close() + + +@pytest.fixture(scope="module") +async def embedding_provider(provider_name: str) -> AsyncGenerator[Provider, None]: + """Fixture providing an embedding-capable provider. + + Requires --provider flag to be set. + Note: Anthropic does not support embeddings - test will fail if used. + """ + if provider_name == "anthropic": + pytest.skip("Anthropic does not support embeddings") + + provider = await create_embedding_provider(provider_name) + yield provider + await provider.close() diff --git a/tests/integration/sampling_support.py b/tests/integration/sampling_support.py index baf74cf..4dae699 100644 --- a/tests/integration/sampling_support.py +++ b/tests/integration/sampling_support.py @@ -1,7 +1,7 @@ """MCP sampling support for integration tests. This module provides utilities to enable real LLM-based sampling in integration tests -using OpenAI or GitHub Models API. +using any provider that supports text generation (OpenAI, Ollama, Anthropic, Bedrock). """ import logging @@ -10,46 +10,58 @@ from typing import Any from mcp import types from mcp.client.session import ClientSession, RequestContext -from nextcloud_mcp_server.providers.openai import OpenAIProvider +from nextcloud_mcp_server.providers.base import Provider logger = logging.getLogger(__name__) -def create_openai_sampling_callback(provider: OpenAIProvider): - """Factory to create a sampling callback using OpenAI provider. +def create_sampling_callback(provider: Provider): + """Factory to create a sampling callback using any generation-capable provider. The callback conforms to MCP's SamplingFnT protocol and can be passed to ClientSession for handling sampling requests from the server. Args: - provider: OpenAIProvider instance configured with a generation model + provider: Any Provider instance that supports generation + (supports_generation=True) Returns: Async callback function for MCP sampling + Raises: + ValueError: If provider doesn't support generation + Example: ```python - provider = OpenAIProvider( - api_key=os.getenv("OPENAI_API_KEY"), - base_url=os.getenv("OPENAI_BASE_URL"), - generation_model="gpt-4o-mini", - ) - callback = create_openai_sampling_callback(provider) + from nextcloud_mcp_server.providers import get_provider - async for session in create_mcp_client_session( - url="http://localhost:8000/mcp", - sampling_callback=callback, - ): - # Session now supports sampling - pass + provider = get_provider() # Auto-detect from environment + if provider.supports_generation: + callback = create_sampling_callback(provider) + + async for session in create_mcp_client_session( + url="http://localhost:8000/mcp", + sampling_callback=callback, + ): + # Session now supports sampling + pass ``` """ + if not provider.supports_generation: + raise ValueError( + f"Provider {provider.__class__.__name__} does not support generation" + ) + + # Get model name for logging (provider-specific attribute) + model_name = ( + getattr(provider, "generation_model", None) or provider.__class__.__name__ + ) async def sampling_callback( context: RequestContext[ClientSession, Any], params: types.CreateMessageRequestParams, ) -> types.CreateMessageResult | types.ErrorData: - """Handle sampling requests using OpenAI provider.""" + """Handle sampling requests using the configured provider.""" logger.debug(f"Sampling callback invoked with {len(params.messages)} messages") # Extract messages and build prompt @@ -68,14 +80,13 @@ def create_openai_sampling_callback(provider: OpenAIProvider): logger.debug(f"Generating response for prompt ({len(prompt)} chars)") try: - # Generate response using OpenAI provider - # Note: temperature is hardcoded in the provider at 0.7 + # Generate response using provider + # Note: temperature is typically hardcoded in providers at 0.7 response = await provider.generate( prompt=prompt, max_tokens=params.maxTokens, ) - model_name = provider.generation_model or "unknown" logger.info(f"Sampling completed: {len(response)} chars from {model_name}") return types.CreateMessageResult( @@ -85,10 +96,25 @@ def create_openai_sampling_callback(provider: OpenAIProvider): stopReason="endTurn", ) except Exception as e: - logger.error(f"OpenAI generation failed: {e}") + logger.error(f"Generation failed ({provider.__class__.__name__}): {e}") return types.ErrorData( code=types.INTERNAL_ERROR, - message=f"OpenAI generation failed: {e!s}", + message=f"Generation failed: {e!s}", ) return sampling_callback + + +def create_openai_sampling_callback(provider: "Provider"): + """Factory to create a sampling callback using OpenAI provider. + + This is a backward-compatible wrapper around create_sampling_callback(). + Prefer using create_sampling_callback() directly for new code. + + Args: + provider: OpenAIProvider instance configured with a generation model + + Returns: + Async callback function for MCP sampling + """ + return create_sampling_callback(provider) diff --git a/tests/integration/test_rag_openai.py b/tests/integration/test_rag.py similarity index 76% rename from tests/integration/test_rag_openai.py rename to tests/integration/test_rag.py index 1f750fc..a9ba160 100644 --- a/tests/integration/test_rag_openai.py +++ b/tests/integration/test_rag.py @@ -1,26 +1,33 @@ -"""Integration tests for RAG pipeline with OpenAI/GitHub Models API. +"""Integration tests for RAG pipeline with multiple LLM providers. These tests validate the complete semantic search and MCP sampling flow using: -1. OpenAI embeddings for semantic search -2. MCP sampling for answer generation +1. MCP server's built-in semantic search (embeddings handled server-side) +2. MCP sampling for answer generation (any generation-capable provider) 3. Pre-indexed Nextcloud User Manual as the knowledge base -Environment Variables: - OPENAI_API_KEY: OpenAI API key or GitHub token for models.github.ai - OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference") - OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small") - OPENAI_GENERATION_MODEL: Generation model for sampling (default: "gpt-4o-mini") - RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud_User_Manual.pdf") +Usage: + # Run with OpenAI (including GitHub Models API) + OPENAI_API_KEY=... pytest tests/integration/test_rag.py --provider=openai -v -For GitHub CI, set: - OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }} - OPENAI_BASE_URL: https://models.github.ai/inference - OPENAI_EMBEDDING_MODEL: openai/text-embedding-3-small - OPENAI_GENERATION_MODEL: openai/gpt-4o-mini + # Run with Ollama + OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_GENERATION_MODEL=llama3.2:1b \\ + pytest tests/integration/test_rag.py --provider=ollama -v + + # Run with Anthropic + ANTHROPIC_API_KEY=... pytest tests/integration/test_rag.py --provider=anthropic -v + + # Run with AWS Bedrock + AWS_REGION=us-east-1 BEDROCK_GENERATION_MODEL=... \\ + pytest tests/integration/test_rag.py --provider=bedrock -v + +Environment Variables: + See tests/integration/provider_fixtures.py for provider-specific configuration. + RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud Manual.pdf") Prerequisites: - Nextcloud User Manual PDF uploaded to Nextcloud - VECTOR_SYNC_ENABLED=true on the MCP server + - Provider-specific environment variables set """ import json @@ -33,9 +40,10 @@ import anyio import pytest from mcp import ClientSession -from nextcloud_mcp_server.providers.openai import OpenAIProvider +from nextcloud_mcp_server.providers.base import Provider from tests.conftest import create_mcp_client_session -from tests.integration.sampling_support import create_openai_sampling_callback +from tests.integration.provider_fixtures import create_generation_provider +from tests.integration.sampling_support import create_sampling_callback logger = logging.getLogger(__name__) @@ -44,14 +52,14 @@ DEFAULT_MANUAL_PATH = "Nextcloud Manual.pdf" async def llm_judge( - provider: "OpenAIProvider", + provider: Provider, ground_truth: str, system_output: str, ) -> bool: """Use LLM to judge if system output aligns with ground truth. Args: - provider: OpenAI provider with generation capability + provider: Any provider with generation capability ground_truth: The expected/reference answer system_output: The system's actual output to evaluate @@ -66,17 +74,18 @@ Does the system output contain the key facts from the ground truth? Answer: TRUE or FALSE""" + logger.info("Received ground truth: %s", ground_truth) + logger.info("Received system output: %s", system_output) + response = await provider.generate(prompt, max_tokens=10) + logger.info("LLM Judge response: %s", response) return "TRUE" in response.upper() -# Skip all tests if OpenAI API key not configured +# Mark all tests as integration tests pytestmark = [ pytest.mark.integration, - pytest.mark.skipif( - not os.getenv("OPENAI_API_KEY"), - reason="OPENAI_API_KEY not set - skipping OpenAI RAG tests", - ), + pytest.mark.rag, ] # Ground truth fixture path @@ -175,78 +184,49 @@ async def indexed_manual_pdf(nc_client, nc_mcp_client): @pytest.fixture(scope="module") -async def openai_provider(): - """OpenAI provider configured from environment (embeddings only).""" - api_key = os.getenv("OPENAI_API_KEY") - base_url = os.getenv("OPENAI_BASE_URL") - embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small") +def provider_name(request) -> str: + """Get the provider name from --provider flag. - provider = OpenAIProvider( - api_key=api_key, - base_url=base_url, - embedding_model=embedding_model, - generation_model=None, # Embeddings only - ) - - yield provider - await provider.close() + Raises pytest.skip if --provider not specified. + """ + name = request.config.getoption("--provider") + if not name: + pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)") + return name @pytest.fixture(scope="module") -async def openai_generation_provider(): - """OpenAI provider configured for text generation (for sampling callback).""" - api_key = os.getenv("OPENAI_API_KEY") - base_url = os.getenv("OPENAI_BASE_URL") - generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini") - - # For GitHub Models API, use the prefixed model name - if base_url and "models.github.ai" in base_url: - if not generation_model.startswith("openai/"): - generation_model = f"openai/{generation_model}" - - provider = OpenAIProvider( - api_key=api_key, - base_url=base_url, - embedding_model=None, # Generation only - generation_model=generation_model, - ) +async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]: + """Provider configured for text generation. + Requires --provider flag to be set. + """ + provider = await create_generation_provider(provider_name) yield provider await provider.close() @pytest.fixture(scope="module") async def nc_mcp_client_with_sampling( - anyio_backend, openai_generation_provider + anyio_backend, generation_provider, provider_name ) -> AsyncGenerator[ClientSession, Any]: - """MCP client with OpenAI-based sampling support. + """MCP client with sampling support using the specified provider. This fixture creates an MCP client that can handle sampling requests - from the server using OpenAI for text generation. + from the server using the configured generation provider. """ - sampling_callback = create_openai_sampling_callback(openai_generation_provider) + sampling_callback = create_sampling_callback(generation_provider) async for session in create_mcp_client_session( url="http://localhost:8000/mcp", - client_name="OpenAI Sampling MCP", + client_name=f"Sampling MCP ({provider_name})", sampling_callback=sampling_callback, ): yield session -async def test_openai_embeddings_work(openai_provider: OpenAIProvider): - """Test that OpenAI embeddings can be generated.""" - embedding = await openai_provider.embed("test query about Nextcloud") - - assert isinstance(embedding, list) - assert len(embedding) > 0 - assert all(isinstance(x, float) for x in embedding) - # OpenAI embedding dimensions: 1536 (small) or 3072 (large) - assert len(embedding) in [1536, 3072] - - async def test_semantic_search_retrieval( - nc_mcp_client, ground_truth_qa, indexed_manual_pdf, openai_generation_provider + nc_mcp_client, ground_truth_qa, indexed_manual_pdf, generation_provider ): """Test that semantic search retrieves relevant documents from the manual. @@ -278,7 +258,7 @@ async def test_semantic_search_retrieval( # Use LLM judge to evaluate if excerpts are relevant to ground truth all_excerpts = " ".join([r["excerpt"] for r in data["results"]]) is_relevant = await llm_judge( - openai_generation_provider, + generation_provider, test_case["ground_truth"], all_excerpts, ) @@ -289,16 +269,16 @@ async def test_semantic_search_answer_with_sampling( nc_mcp_client_with_sampling, ground_truth_qa, indexed_manual_pdf, - openai_generation_provider, + generation_provider, ): """Test semantic search with MCP sampling for answer generation. This tests the full RAG pipeline: 1. Semantic search retrieves relevant documents 2. MCP sampling generates an answer from the retrieved context - 3. OpenAI generates the answer via the sampling callback + 3. Provider generates the answer via the sampling callback - Uses nc_mcp_client_with_sampling which has OpenAI-based sampling enabled. + Uses nc_mcp_client_with_sampling which has sampling enabled. """ # Use the 2FA question - has clear expected answer test_case = ground_truth_qa[0] @@ -348,7 +328,7 @@ async def test_semantic_search_answer_with_sampling( # Use LLM judge to evaluate answer relevance is_relevant = await llm_judge( - openai_generation_provider, + generation_provider, test_case["ground_truth"], data["generated_answer"], )