fix: Increase MCP sampling timeout to 5 minutes for slower LLMs

- Increase sampling timeout from 30s to 300s in semantic.py to accommodate
  slower local LLMs like Ollama
- Refactor RAG integration tests to support multiple providers (ollama,
  openai, anthropic, bedrock)
- Remove unnecessary embedding_provider fixture since MCP server handles
  embeddings internally
- Add --provider flag via tests/integration/conftest.py
- Add provider_fixtures.py with factory functions for generation providers

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Chris Coutinho
2025-11-23 05:43:15 +01:00
parent f5764c01fc
commit 5c73b85f65
7 changed files with 416 additions and 124 deletions
+1 -16
View File
@@ -25,21 +25,6 @@ jobs:
steps:
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with:
submodules: 'true'
###### Required to build OIDC App ######
- name: Set up php 8.4
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
with:
php-version: 8.4
coverage: none
- name: Install OIDC app composer dependencies
run: |
cd third_party/oidc
composer install --no-dev
###### Required to build OIDC App ######
- name: Run docker compose with vector sync
uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
@@ -101,7 +86,7 @@ jobs:
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
run: |
uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO
uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO --provider openai
- name: Upload test results
if: always()
+6 -4
View File
@@ -17,18 +17,20 @@ class AnthropicProvider(Provider):
Note: Anthropic doesn't provide embedding models, only text generation.
"""
def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
def __init__(
self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022"
):
"""
Initialize Anthropic provider.
Args:
api_key: Anthropic API key
model: Model name (e.g., "claude-3-5-sonnet-20241022")
generation_model: Model name (e.g., "claude-3-5-sonnet-20241022")
"""
self.client = AsyncAnthropic(api_key=api_key)
self.model = model
self.model = generation_model
logger.info(f"Initialized Anthropic provider (model={model})")
logger.info(f"Initialized Anthropic provider (model={self.model})")
@property
def supports_embeddings(self) -> bool:
+14 -5
View File
@@ -499,9 +499,11 @@ def configure_semantic_tools(mcp: FastMCP):
)
# 6. Request LLM completion via MCP sampling with timeout
# Note: 5 minute timeout to accommodate slower local LLMs (e.g., Ollama)
sampling_timeout_seconds = 300
try:
with anyio.fail_after(30):
with anyio.fail_after(sampling_timeout_seconds):
sampling_result = await ctx.session.create_message(
messages=[
SamplingMessage(
@@ -548,14 +550,14 @@ def configure_semantic_tools(mcp: FastMCP):
except TimeoutError:
logger.warning(
f"Sampling request timed out after 30 seconds for query: '{query}', "
f"Sampling request timed out after {sampling_timeout_seconds} seconds for query: '{query}', "
f"returning search results only"
)
return SamplingSearchResponse(
query=query,
generated_answer=(
f"[Sampling request timed out]\n\n"
f"The answer generation took too long (>30s). "
f"The answer generation took too long (>{sampling_timeout_seconds}s). "
f"Found {len(accessible_results)} relevant documents. "
f"Please review the sources below or try a simpler query."
),
@@ -675,15 +677,22 @@ def configure_semantic_tools(mcp: FastMCP):
# Get Qdrant client and query indexed count
indexed_count = 0
try:
from qdrant_client.models import Filter
from nextcloud_mcp_server.config import get_settings
from nextcloud_mcp_server.vector.placeholder import (
get_placeholder_filter,
)
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
settings = get_settings()
qdrant_client = await get_qdrant_client()
# Count documents in collection
# Count documents in collection, excluding placeholders
# Placeholders are zero-vector points used to track processing state
count_result = await qdrant_client.count(
collection_name=settings.get_collection_name()
collection_name=settings.get_collection_name(),
count_filter=Filter(must=[get_placeholder_filter()]),
)
indexed_count = count_result.count
+26
View File
@@ -0,0 +1,26 @@
"""Pytest configuration for integration tests.
This conftest.py provides hooks and fixtures specific to integration tests,
including the --provider flag for RAG tests.
"""
# Valid provider names
VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"]
def pytest_addoption(parser):
"""Add --provider command line option for RAG tests."""
parser.addoption(
"--provider",
action="store",
default=None,
choices=VALID_PROVIDERS,
help="LLM provider for RAG tests: openai, ollama, anthropic, bedrock",
)
def pytest_configure(config):
"""Configure custom markers."""
config.addinivalue_line(
"markers", "rag: mark test as RAG integration test (requires --provider flag)"
)
+264
View File
@@ -0,0 +1,264 @@
"""Provider fixtures for integration tests.
This module provides pytest fixtures that configure LLM providers based on
an explicit --provider flag. Supports OpenAI, Ollama, Anthropic, and Bedrock.
Usage:
pytest tests/integration/test_rag.py --provider=openai
pytest tests/integration/test_rag.py --provider=ollama
pytest tests/integration/test_rag.py --provider=anthropic
pytest tests/integration/test_rag.py --provider=bedrock
Environment Variables by Provider:
OpenAI:
OPENAI_API_KEY: API key (required)
OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
OPENAI_GENERATION_MODEL: Generation model (default: "gpt-4o-mini")
Ollama:
OLLAMA_BASE_URL: API URL (required, e.g., "http://localhost:11434")
OLLAMA_EMBEDDING_MODEL: Embedding model (default: "nomic-embed-text")
OLLAMA_GENERATION_MODEL: Generation model (default: "llama3.2:1b")
Anthropic:
ANTHROPIC_API_KEY: API key (required)
ANTHROPIC_GENERATION_MODEL: Model (default: "claude-3-haiku-20240307")
Bedrock:
AWS_REGION: AWS region (required)
BEDROCK_EMBEDDING_MODEL: Embedding model ID
BEDROCK_GENERATION_MODEL: Generation model ID
"""
import logging
import os
from typing import AsyncGenerator
import pytest
from nextcloud_mcp_server.providers.base import Provider
logger = logging.getLogger(__name__)
# Valid provider names (must match conftest.py)
VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"]
async def create_generation_provider(provider_name: str) -> Provider:
"""Create a provider configured for text generation.
Args:
provider_name: One of "openai", "ollama", "anthropic", "bedrock"
Returns:
Provider instance configured for generation
Raises:
ValueError: If provider_name is invalid or required env vars missing
"""
if provider_name == "openai":
from nextcloud_mcp_server.providers.openai import OpenAIProvider
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY environment variable required")
base_url = os.getenv("OPENAI_BASE_URL")
generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
# GitHub Models API requires model name prefix
if base_url and "models.github.ai" in base_url:
if not generation_model.startswith("openai/"):
generation_model = f"openai/{generation_model}"
provider = OpenAIProvider(
api_key=api_key,
base_url=base_url,
embedding_model=None, # Generation only
generation_model=generation_model,
)
logger.info(f"Created OpenAI generation provider: model={generation_model}")
return provider
elif provider_name == "ollama":
from nextcloud_mcp_server.providers.ollama import OllamaProvider
base_url = os.getenv("OLLAMA_BASE_URL")
if not base_url:
raise ValueError("OLLAMA_BASE_URL environment variable required")
generation_model = os.getenv("OLLAMA_GENERATION_MODEL", "llama3.2:1b")
provider = OllamaProvider(
base_url=base_url,
embedding_model=None, # Generation only
generation_model=generation_model,
)
logger.info(f"Created Ollama generation provider: model={generation_model}")
return provider
elif provider_name == "anthropic":
from nextcloud_mcp_server.providers.anthropic import AnthropicProvider
api_key = os.getenv("ANTHROPIC_API_KEY")
if not api_key:
raise ValueError("ANTHROPIC_API_KEY environment variable required")
generation_model = os.getenv(
"ANTHROPIC_GENERATION_MODEL", "claude-3-haiku-20240307"
)
provider = AnthropicProvider(
api_key=api_key,
generation_model=generation_model,
)
logger.info(f"Created Anthropic generation provider: model={generation_model}")
return provider
elif provider_name == "bedrock":
from nextcloud_mcp_server.providers.bedrock import BedrockProvider
region = os.getenv("AWS_REGION")
if not region:
raise ValueError("AWS_REGION environment variable required")
generation_model = os.getenv("BEDROCK_GENERATION_MODEL")
if not generation_model:
raise ValueError("BEDROCK_GENERATION_MODEL environment variable required")
provider = BedrockProvider(
region=region,
embedding_model=None, # Generation only
generation_model=generation_model,
)
logger.info(f"Created Bedrock generation provider: model={generation_model}")
return provider
else:
raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}")
async def create_embedding_provider(provider_name: str) -> Provider:
"""Create a provider configured for embeddings.
Args:
provider_name: One of "openai", "ollama", "bedrock"
(Anthropic does not support embeddings)
Returns:
Provider instance configured for embeddings
Raises:
ValueError: If provider_name is invalid, doesn't support embeddings,
or required env vars missing
"""
if provider_name == "anthropic":
raise ValueError("Anthropic does not support embeddings")
if provider_name == "openai":
from nextcloud_mcp_server.providers.openai import OpenAIProvider
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY environment variable required")
base_url = os.getenv("OPENAI_BASE_URL")
embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
# GitHub Models API requires model name prefix
if base_url and "models.github.ai" in base_url:
if not embedding_model.startswith("openai/"):
embedding_model = f"openai/{embedding_model}"
provider = OpenAIProvider(
api_key=api_key,
base_url=base_url,
embedding_model=embedding_model,
generation_model=None, # Embeddings only
)
logger.info(f"Created OpenAI embedding provider: model={embedding_model}")
return provider
elif provider_name == "ollama":
from nextcloud_mcp_server.providers.ollama import OllamaProvider
base_url = os.getenv("OLLAMA_BASE_URL")
if not base_url:
raise ValueError("OLLAMA_BASE_URL environment variable required")
embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
provider = OllamaProvider(
base_url=base_url,
embedding_model=embedding_model,
generation_model=None, # Embeddings only
)
logger.info(f"Created Ollama embedding provider: model={embedding_model}")
return provider
elif provider_name == "bedrock":
from nextcloud_mcp_server.providers.bedrock import BedrockProvider
region = os.getenv("AWS_REGION")
if not region:
raise ValueError("AWS_REGION environment variable required")
embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL")
if not embedding_model:
raise ValueError("BEDROCK_EMBEDDING_MODEL environment variable required")
provider = BedrockProvider(
region=region,
embedding_model=embedding_model,
generation_model=None, # Embeddings only
)
logger.info(f"Created Bedrock embedding provider: model={embedding_model}")
return provider
else:
raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}")
# =============================================================================
# Pytest Fixtures
# =============================================================================
@pytest.fixture(scope="module")
def provider_name(request) -> str:
"""Get the provider name from --provider flag.
Raises pytest.skip if --provider not specified.
"""
name = request.config.getoption("--provider")
if not name:
pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)")
return name
@pytest.fixture(scope="module")
async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
"""Fixture providing a generation-capable provider.
Requires --provider flag to be set.
"""
provider = await create_generation_provider(provider_name)
yield provider
await provider.close()
@pytest.fixture(scope="module")
async def embedding_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
"""Fixture providing an embedding-capable provider.
Requires --provider flag to be set.
Note: Anthropic does not support embeddings - test will fail if used.
"""
if provider_name == "anthropic":
pytest.skip("Anthropic does not support embeddings")
provider = await create_embedding_provider(provider_name)
yield provider
await provider.close()
+49 -23
View File
@@ -1,7 +1,7 @@
"""MCP sampling support for integration tests.
This module provides utilities to enable real LLM-based sampling in integration tests
using OpenAI or GitHub Models API.
using any provider that supports text generation (OpenAI, Ollama, Anthropic, Bedrock).
"""
import logging
@@ -10,46 +10,58 @@ from typing import Any
from mcp import types
from mcp.client.session import ClientSession, RequestContext
from nextcloud_mcp_server.providers.openai import OpenAIProvider
from nextcloud_mcp_server.providers.base import Provider
logger = logging.getLogger(__name__)
def create_openai_sampling_callback(provider: OpenAIProvider):
"""Factory to create a sampling callback using OpenAI provider.
def create_sampling_callback(provider: Provider):
"""Factory to create a sampling callback using any generation-capable provider.
The callback conforms to MCP's SamplingFnT protocol and can be passed
to ClientSession for handling sampling requests from the server.
Args:
provider: OpenAIProvider instance configured with a generation model
provider: Any Provider instance that supports generation
(supports_generation=True)
Returns:
Async callback function for MCP sampling
Raises:
ValueError: If provider doesn't support generation
Example:
```python
provider = OpenAIProvider(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
generation_model="gpt-4o-mini",
)
callback = create_openai_sampling_callback(provider)
from nextcloud_mcp_server.providers import get_provider
async for session in create_mcp_client_session(
url="http://localhost:8000/mcp",
sampling_callback=callback,
):
# Session now supports sampling
pass
provider = get_provider() # Auto-detect from environment
if provider.supports_generation:
callback = create_sampling_callback(provider)
async for session in create_mcp_client_session(
url="http://localhost:8000/mcp",
sampling_callback=callback,
):
# Session now supports sampling
pass
```
"""
if not provider.supports_generation:
raise ValueError(
f"Provider {provider.__class__.__name__} does not support generation"
)
# Get model name for logging (provider-specific attribute)
model_name = (
getattr(provider, "generation_model", None) or provider.__class__.__name__
)
async def sampling_callback(
context: RequestContext[ClientSession, Any],
params: types.CreateMessageRequestParams,
) -> types.CreateMessageResult | types.ErrorData:
"""Handle sampling requests using OpenAI provider."""
"""Handle sampling requests using the configured provider."""
logger.debug(f"Sampling callback invoked with {len(params.messages)} messages")
# Extract messages and build prompt
@@ -68,14 +80,13 @@ def create_openai_sampling_callback(provider: OpenAIProvider):
logger.debug(f"Generating response for prompt ({len(prompt)} chars)")
try:
# Generate response using OpenAI provider
# Note: temperature is hardcoded in the provider at 0.7
# Generate response using provider
# Note: temperature is typically hardcoded in providers at 0.7
response = await provider.generate(
prompt=prompt,
max_tokens=params.maxTokens,
)
model_name = provider.generation_model or "unknown"
logger.info(f"Sampling completed: {len(response)} chars from {model_name}")
return types.CreateMessageResult(
@@ -85,10 +96,25 @@ def create_openai_sampling_callback(provider: OpenAIProvider):
stopReason="endTurn",
)
except Exception as e:
logger.error(f"OpenAI generation failed: {e}")
logger.error(f"Generation failed ({provider.__class__.__name__}): {e}")
return types.ErrorData(
code=types.INTERNAL_ERROR,
message=f"OpenAI generation failed: {e!s}",
message=f"Generation failed: {e!s}",
)
return sampling_callback
def create_openai_sampling_callback(provider: "Provider"):
"""Factory to create a sampling callback using OpenAI provider.
This is a backward-compatible wrapper around create_sampling_callback().
Prefer using create_sampling_callback() directly for new code.
Args:
provider: OpenAIProvider instance configured with a generation model
Returns:
Async callback function for MCP sampling
"""
return create_sampling_callback(provider)
@@ -1,26 +1,33 @@
"""Integration tests for RAG pipeline with OpenAI/GitHub Models API.
"""Integration tests for RAG pipeline with multiple LLM providers.
These tests validate the complete semantic search and MCP sampling flow using:
1. OpenAI embeddings for semantic search
2. MCP sampling for answer generation
1. MCP server's built-in semantic search (embeddings handled server-side)
2. MCP sampling for answer generation (any generation-capable provider)
3. Pre-indexed Nextcloud User Manual as the knowledge base
Environment Variables:
OPENAI_API_KEY: OpenAI API key or GitHub token for models.github.ai
OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
OPENAI_GENERATION_MODEL: Generation model for sampling (default: "gpt-4o-mini")
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud_User_Manual.pdf")
Usage:
# Run with OpenAI (including GitHub Models API)
OPENAI_API_KEY=... pytest tests/integration/test_rag.py --provider=openai -v
For GitHub CI, set:
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
OPENAI_BASE_URL: https://models.github.ai/inference
OPENAI_EMBEDDING_MODEL: openai/text-embedding-3-small
OPENAI_GENERATION_MODEL: openai/gpt-4o-mini
# Run with Ollama
OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_GENERATION_MODEL=llama3.2:1b \\
pytest tests/integration/test_rag.py --provider=ollama -v
# Run with Anthropic
ANTHROPIC_API_KEY=... pytest tests/integration/test_rag.py --provider=anthropic -v
# Run with AWS Bedrock
AWS_REGION=us-east-1 BEDROCK_GENERATION_MODEL=... \\
pytest tests/integration/test_rag.py --provider=bedrock -v
Environment Variables:
See tests/integration/provider_fixtures.py for provider-specific configuration.
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud Manual.pdf")
Prerequisites:
- Nextcloud User Manual PDF uploaded to Nextcloud
- VECTOR_SYNC_ENABLED=true on the MCP server
- Provider-specific environment variables set
"""
import json
@@ -33,9 +40,10 @@ import anyio
import pytest
from mcp import ClientSession
from nextcloud_mcp_server.providers.openai import OpenAIProvider
from nextcloud_mcp_server.providers.base import Provider
from tests.conftest import create_mcp_client_session
from tests.integration.sampling_support import create_openai_sampling_callback
from tests.integration.provider_fixtures import create_generation_provider
from tests.integration.sampling_support import create_sampling_callback
logger = logging.getLogger(__name__)
@@ -44,14 +52,14 @@ DEFAULT_MANUAL_PATH = "Nextcloud Manual.pdf"
async def llm_judge(
provider: "OpenAIProvider",
provider: Provider,
ground_truth: str,
system_output: str,
) -> bool:
"""Use LLM to judge if system output aligns with ground truth.
Args:
provider: OpenAI provider with generation capability
provider: Any provider with generation capability
ground_truth: The expected/reference answer
system_output: The system's actual output to evaluate
@@ -66,17 +74,18 @@ Does the system output contain the key facts from the ground truth?
Answer: TRUE or FALSE"""
logger.info("Received ground truth: %s", ground_truth)
logger.info("Received system output: %s", system_output)
response = await provider.generate(prompt, max_tokens=10)
logger.info("LLM Judge response: %s", response)
return "TRUE" in response.upper()
# Skip all tests if OpenAI API key not configured
# Mark all tests as integration tests
pytestmark = [
pytest.mark.integration,
pytest.mark.skipif(
not os.getenv("OPENAI_API_KEY"),
reason="OPENAI_API_KEY not set - skipping OpenAI RAG tests",
),
pytest.mark.rag,
]
# Ground truth fixture path
@@ -175,78 +184,49 @@ async def indexed_manual_pdf(nc_client, nc_mcp_client):
@pytest.fixture(scope="module")
async def openai_provider():
"""OpenAI provider configured from environment (embeddings only)."""
api_key = os.getenv("OPENAI_API_KEY")
base_url = os.getenv("OPENAI_BASE_URL")
embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
def provider_name(request) -> str:
"""Get the provider name from --provider flag.
provider = OpenAIProvider(
api_key=api_key,
base_url=base_url,
embedding_model=embedding_model,
generation_model=None, # Embeddings only
)
yield provider
await provider.close()
Raises pytest.skip if --provider not specified.
"""
name = request.config.getoption("--provider")
if not name:
pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)")
return name
@pytest.fixture(scope="module")
async def openai_generation_provider():
"""OpenAI provider configured for text generation (for sampling callback)."""
api_key = os.getenv("OPENAI_API_KEY")
base_url = os.getenv("OPENAI_BASE_URL")
generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
# For GitHub Models API, use the prefixed model name
if base_url and "models.github.ai" in base_url:
if not generation_model.startswith("openai/"):
generation_model = f"openai/{generation_model}"
provider = OpenAIProvider(
api_key=api_key,
base_url=base_url,
embedding_model=None, # Generation only
generation_model=generation_model,
)
async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
"""Provider configured for text generation.
Requires --provider flag to be set.
"""
provider = await create_generation_provider(provider_name)
yield provider
await provider.close()
@pytest.fixture(scope="module")
async def nc_mcp_client_with_sampling(
anyio_backend, openai_generation_provider
anyio_backend, generation_provider, provider_name
) -> AsyncGenerator[ClientSession, Any]:
"""MCP client with OpenAI-based sampling support.
"""MCP client with sampling support using the specified provider.
This fixture creates an MCP client that can handle sampling requests
from the server using OpenAI for text generation.
from the server using the configured generation provider.
"""
sampling_callback = create_openai_sampling_callback(openai_generation_provider)
sampling_callback = create_sampling_callback(generation_provider)
async for session in create_mcp_client_session(
url="http://localhost:8000/mcp",
client_name="OpenAI Sampling MCP",
client_name=f"Sampling MCP ({provider_name})",
sampling_callback=sampling_callback,
):
yield session
async def test_openai_embeddings_work(openai_provider: OpenAIProvider):
"""Test that OpenAI embeddings can be generated."""
embedding = await openai_provider.embed("test query about Nextcloud")
assert isinstance(embedding, list)
assert len(embedding) > 0
assert all(isinstance(x, float) for x in embedding)
# OpenAI embedding dimensions: 1536 (small) or 3072 (large)
assert len(embedding) in [1536, 3072]
async def test_semantic_search_retrieval(
nc_mcp_client, ground_truth_qa, indexed_manual_pdf, openai_generation_provider
nc_mcp_client, ground_truth_qa, indexed_manual_pdf, generation_provider
):
"""Test that semantic search retrieves relevant documents from the manual.
@@ -278,7 +258,7 @@ async def test_semantic_search_retrieval(
# Use LLM judge to evaluate if excerpts are relevant to ground truth
all_excerpts = " ".join([r["excerpt"] for r in data["results"]])
is_relevant = await llm_judge(
openai_generation_provider,
generation_provider,
test_case["ground_truth"],
all_excerpts,
)
@@ -289,16 +269,16 @@ async def test_semantic_search_answer_with_sampling(
nc_mcp_client_with_sampling,
ground_truth_qa,
indexed_manual_pdf,
openai_generation_provider,
generation_provider,
):
"""Test semantic search with MCP sampling for answer generation.
This tests the full RAG pipeline:
1. Semantic search retrieves relevant documents
2. MCP sampling generates an answer from the retrieved context
3. OpenAI generates the answer via the sampling callback
3. Provider generates the answer via the sampling callback
Uses nc_mcp_client_with_sampling which has OpenAI-based sampling enabled.
Uses nc_mcp_client_with_sampling which has sampling enabled.
"""
# Use the 2FA question - has clear expected answer
test_case = ground_truth_qa[0]
@@ -348,7 +328,7 @@ async def test_semantic_search_answer_with_sampling(
# Use LLM judge to evaluate answer relevance
is_relevant = await llm_judge(
openai_generation_provider,
generation_provider,
test_case["ground_truth"],
data["generated_answer"],
)