fix: Increase MCP sampling timeout to 5 minutes for slower LLMs
- Increase sampling timeout from 30s to 300s in semantic.py to accommodate slower local LLMs like Ollama - Refactor RAG integration tests to support multiple providers (ollama, openai, anthropic, bedrock) - Remove unnecessary embedding_provider fixture since MCP server handles embeddings internally - Add --provider flag via tests/integration/conftest.py - Add provider_fixtures.py with factory functions for generation providers 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -25,21 +25,6 @@ jobs:
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
###### Required to build OIDC App ######
|
||||
- name: Set up php 8.4
|
||||
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
|
||||
with:
|
||||
php-version: 8.4
|
||||
coverage: none
|
||||
|
||||
- name: Install OIDC app composer dependencies
|
||||
run: |
|
||||
cd third_party/oidc
|
||||
composer install --no-dev
|
||||
###### Required to build OIDC App ######
|
||||
|
||||
- name: Run docker compose with vector sync
|
||||
uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
|
||||
@@ -101,7 +86,7 @@ jobs:
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
|
||||
run: |
|
||||
uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO
|
||||
uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO --provider openai
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
|
||||
@@ -17,18 +17,20 @@ class AnthropicProvider(Provider):
|
||||
Note: Anthropic doesn't provide embedding models, only text generation.
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
|
||||
def __init__(
|
||||
self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022"
|
||||
):
|
||||
"""
|
||||
Initialize Anthropic provider.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key
|
||||
model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
generation_model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
"""
|
||||
self.client = AsyncAnthropic(api_key=api_key)
|
||||
self.model = model
|
||||
self.model = generation_model
|
||||
|
||||
logger.info(f"Initialized Anthropic provider (model={model})")
|
||||
logger.info(f"Initialized Anthropic provider (model={self.model})")
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
|
||||
@@ -499,9 +499,11 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
)
|
||||
|
||||
# 6. Request LLM completion via MCP sampling with timeout
|
||||
# Note: 5 minute timeout to accommodate slower local LLMs (e.g., Ollama)
|
||||
sampling_timeout_seconds = 300
|
||||
|
||||
try:
|
||||
with anyio.fail_after(30):
|
||||
with anyio.fail_after(sampling_timeout_seconds):
|
||||
sampling_result = await ctx.session.create_message(
|
||||
messages=[
|
||||
SamplingMessage(
|
||||
@@ -548,14 +550,14 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
|
||||
except TimeoutError:
|
||||
logger.warning(
|
||||
f"Sampling request timed out after 30 seconds for query: '{query}', "
|
||||
f"Sampling request timed out after {sampling_timeout_seconds} seconds for query: '{query}', "
|
||||
f"returning search results only"
|
||||
)
|
||||
return SamplingSearchResponse(
|
||||
query=query,
|
||||
generated_answer=(
|
||||
f"[Sampling request timed out]\n\n"
|
||||
f"The answer generation took too long (>30s). "
|
||||
f"The answer generation took too long (>{sampling_timeout_seconds}s). "
|
||||
f"Found {len(accessible_results)} relevant documents. "
|
||||
f"Please review the sources below or try a simpler query."
|
||||
),
|
||||
@@ -675,15 +677,22 @@ def configure_semantic_tools(mcp: FastMCP):
|
||||
# Get Qdrant client and query indexed count
|
||||
indexed_count = 0
|
||||
try:
|
||||
from qdrant_client.models import Filter
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.placeholder import (
|
||||
get_placeholder_filter,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
settings = get_settings()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
# Count documents in collection
|
||||
# Count documents in collection, excluding placeholders
|
||||
# Placeholders are zero-vector points used to track processing state
|
||||
count_result = await qdrant_client.count(
|
||||
collection_name=settings.get_collection_name()
|
||||
collection_name=settings.get_collection_name(),
|
||||
count_filter=Filter(must=[get_placeholder_filter()]),
|
||||
)
|
||||
indexed_count = count_result.count
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
"""Pytest configuration for integration tests.
|
||||
|
||||
This conftest.py provides hooks and fixtures specific to integration tests,
|
||||
including the --provider flag for RAG tests.
|
||||
"""
|
||||
|
||||
# Valid provider names
|
||||
VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"]
|
||||
|
||||
|
||||
def pytest_addoption(parser):
|
||||
"""Add --provider command line option for RAG tests."""
|
||||
parser.addoption(
|
||||
"--provider",
|
||||
action="store",
|
||||
default=None,
|
||||
choices=VALID_PROVIDERS,
|
||||
help="LLM provider for RAG tests: openai, ollama, anthropic, bedrock",
|
||||
)
|
||||
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure custom markers."""
|
||||
config.addinivalue_line(
|
||||
"markers", "rag: mark test as RAG integration test (requires --provider flag)"
|
||||
)
|
||||
@@ -0,0 +1,264 @@
|
||||
"""Provider fixtures for integration tests.
|
||||
|
||||
This module provides pytest fixtures that configure LLM providers based on
|
||||
an explicit --provider flag. Supports OpenAI, Ollama, Anthropic, and Bedrock.
|
||||
|
||||
Usage:
|
||||
pytest tests/integration/test_rag.py --provider=openai
|
||||
pytest tests/integration/test_rag.py --provider=ollama
|
||||
pytest tests/integration/test_rag.py --provider=anthropic
|
||||
pytest tests/integration/test_rag.py --provider=bedrock
|
||||
|
||||
Environment Variables by Provider:
|
||||
|
||||
OpenAI:
|
||||
OPENAI_API_KEY: API key (required)
|
||||
OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
|
||||
OPENAI_GENERATION_MODEL: Generation model (default: "gpt-4o-mini")
|
||||
|
||||
Ollama:
|
||||
OLLAMA_BASE_URL: API URL (required, e.g., "http://localhost:11434")
|
||||
OLLAMA_EMBEDDING_MODEL: Embedding model (default: "nomic-embed-text")
|
||||
OLLAMA_GENERATION_MODEL: Generation model (default: "llama3.2:1b")
|
||||
|
||||
Anthropic:
|
||||
ANTHROPIC_API_KEY: API key (required)
|
||||
ANTHROPIC_GENERATION_MODEL: Model (default: "claude-3-haiku-20240307")
|
||||
|
||||
Bedrock:
|
||||
AWS_REGION: AWS region (required)
|
||||
BEDROCK_EMBEDDING_MODEL: Embedding model ID
|
||||
BEDROCK_GENERATION_MODEL: Generation model ID
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import AsyncGenerator
|
||||
|
||||
import pytest
|
||||
|
||||
from nextcloud_mcp_server.providers.base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Valid provider names (must match conftest.py)
|
||||
VALID_PROVIDERS = ["openai", "ollama", "anthropic", "bedrock"]
|
||||
|
||||
|
||||
async def create_generation_provider(provider_name: str) -> Provider:
|
||||
"""Create a provider configured for text generation.
|
||||
|
||||
Args:
|
||||
provider_name: One of "openai", "ollama", "anthropic", "bedrock"
|
||||
|
||||
Returns:
|
||||
Provider instance configured for generation
|
||||
|
||||
Raises:
|
||||
ValueError: If provider_name is invalid or required env vars missing
|
||||
"""
|
||||
if provider_name == "openai":
|
||||
from nextcloud_mcp_server.providers.openai import OpenAIProvider
|
||||
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("OPENAI_API_KEY environment variable required")
|
||||
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
|
||||
|
||||
# GitHub Models API requires model name prefix
|
||||
if base_url and "models.github.ai" in base_url:
|
||||
if not generation_model.startswith("openai/"):
|
||||
generation_model = f"openai/{generation_model}"
|
||||
|
||||
provider = OpenAIProvider(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=None, # Generation only
|
||||
generation_model=generation_model,
|
||||
)
|
||||
logger.info(f"Created OpenAI generation provider: model={generation_model}")
|
||||
return provider
|
||||
|
||||
elif provider_name == "ollama":
|
||||
from nextcloud_mcp_server.providers.ollama import OllamaProvider
|
||||
|
||||
base_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if not base_url:
|
||||
raise ValueError("OLLAMA_BASE_URL environment variable required")
|
||||
|
||||
generation_model = os.getenv("OLLAMA_GENERATION_MODEL", "llama3.2:1b")
|
||||
|
||||
provider = OllamaProvider(
|
||||
base_url=base_url,
|
||||
embedding_model=None, # Generation only
|
||||
generation_model=generation_model,
|
||||
)
|
||||
logger.info(f"Created Ollama generation provider: model={generation_model}")
|
||||
return provider
|
||||
|
||||
elif provider_name == "anthropic":
|
||||
from nextcloud_mcp_server.providers.anthropic import AnthropicProvider
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("ANTHROPIC_API_KEY environment variable required")
|
||||
|
||||
generation_model = os.getenv(
|
||||
"ANTHROPIC_GENERATION_MODEL", "claude-3-haiku-20240307"
|
||||
)
|
||||
|
||||
provider = AnthropicProvider(
|
||||
api_key=api_key,
|
||||
generation_model=generation_model,
|
||||
)
|
||||
logger.info(f"Created Anthropic generation provider: model={generation_model}")
|
||||
return provider
|
||||
|
||||
elif provider_name == "bedrock":
|
||||
from nextcloud_mcp_server.providers.bedrock import BedrockProvider
|
||||
|
||||
region = os.getenv("AWS_REGION")
|
||||
if not region:
|
||||
raise ValueError("AWS_REGION environment variable required")
|
||||
|
||||
generation_model = os.getenv("BEDROCK_GENERATION_MODEL")
|
||||
if not generation_model:
|
||||
raise ValueError("BEDROCK_GENERATION_MODEL environment variable required")
|
||||
|
||||
provider = BedrockProvider(
|
||||
region=region,
|
||||
embedding_model=None, # Generation only
|
||||
generation_model=generation_model,
|
||||
)
|
||||
logger.info(f"Created Bedrock generation provider: model={generation_model}")
|
||||
return provider
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}")
|
||||
|
||||
|
||||
async def create_embedding_provider(provider_name: str) -> Provider:
|
||||
"""Create a provider configured for embeddings.
|
||||
|
||||
Args:
|
||||
provider_name: One of "openai", "ollama", "bedrock"
|
||||
(Anthropic does not support embeddings)
|
||||
|
||||
Returns:
|
||||
Provider instance configured for embeddings
|
||||
|
||||
Raises:
|
||||
ValueError: If provider_name is invalid, doesn't support embeddings,
|
||||
or required env vars missing
|
||||
"""
|
||||
if provider_name == "anthropic":
|
||||
raise ValueError("Anthropic does not support embeddings")
|
||||
|
||||
if provider_name == "openai":
|
||||
from nextcloud_mcp_server.providers.openai import OpenAIProvider
|
||||
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError("OPENAI_API_KEY environment variable required")
|
||||
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
|
||||
|
||||
# GitHub Models API requires model name prefix
|
||||
if base_url and "models.github.ai" in base_url:
|
||||
if not embedding_model.startswith("openai/"):
|
||||
embedding_model = f"openai/{embedding_model}"
|
||||
|
||||
provider = OpenAIProvider(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=None, # Embeddings only
|
||||
)
|
||||
logger.info(f"Created OpenAI embedding provider: model={embedding_model}")
|
||||
return provider
|
||||
|
||||
elif provider_name == "ollama":
|
||||
from nextcloud_mcp_server.providers.ollama import OllamaProvider
|
||||
|
||||
base_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if not base_url:
|
||||
raise ValueError("OLLAMA_BASE_URL environment variable required")
|
||||
|
||||
embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
|
||||
|
||||
provider = OllamaProvider(
|
||||
base_url=base_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=None, # Embeddings only
|
||||
)
|
||||
logger.info(f"Created Ollama embedding provider: model={embedding_model}")
|
||||
return provider
|
||||
|
||||
elif provider_name == "bedrock":
|
||||
from nextcloud_mcp_server.providers.bedrock import BedrockProvider
|
||||
|
||||
region = os.getenv("AWS_REGION")
|
||||
if not region:
|
||||
raise ValueError("AWS_REGION environment variable required")
|
||||
|
||||
embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL")
|
||||
if not embedding_model:
|
||||
raise ValueError("BEDROCK_EMBEDDING_MODEL environment variable required")
|
||||
|
||||
provider = BedrockProvider(
|
||||
region=region,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=None, # Embeddings only
|
||||
)
|
||||
logger.info(f"Created Bedrock embedding provider: model={embedding_model}")
|
||||
return provider
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown provider: {provider_name}. Valid: {VALID_PROVIDERS}")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Pytest Fixtures
|
||||
# =============================================================================
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def provider_name(request) -> str:
|
||||
"""Get the provider name from --provider flag.
|
||||
|
||||
Raises pytest.skip if --provider not specified.
|
||||
"""
|
||||
name = request.config.getoption("--provider")
|
||||
if not name:
|
||||
pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)")
|
||||
return name
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
|
||||
"""Fixture providing a generation-capable provider.
|
||||
|
||||
Requires --provider flag to be set.
|
||||
"""
|
||||
provider = await create_generation_provider(provider_name)
|
||||
yield provider
|
||||
await provider.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def embedding_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
|
||||
"""Fixture providing an embedding-capable provider.
|
||||
|
||||
Requires --provider flag to be set.
|
||||
Note: Anthropic does not support embeddings - test will fail if used.
|
||||
"""
|
||||
if provider_name == "anthropic":
|
||||
pytest.skip("Anthropic does not support embeddings")
|
||||
|
||||
provider = await create_embedding_provider(provider_name)
|
||||
yield provider
|
||||
await provider.close()
|
||||
@@ -1,7 +1,7 @@
|
||||
"""MCP sampling support for integration tests.
|
||||
|
||||
This module provides utilities to enable real LLM-based sampling in integration tests
|
||||
using OpenAI or GitHub Models API.
|
||||
using any provider that supports text generation (OpenAI, Ollama, Anthropic, Bedrock).
|
||||
"""
|
||||
|
||||
import logging
|
||||
@@ -10,46 +10,58 @@ from typing import Any
|
||||
from mcp import types
|
||||
from mcp.client.session import ClientSession, RequestContext
|
||||
|
||||
from nextcloud_mcp_server.providers.openai import OpenAIProvider
|
||||
from nextcloud_mcp_server.providers.base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_openai_sampling_callback(provider: OpenAIProvider):
|
||||
"""Factory to create a sampling callback using OpenAI provider.
|
||||
def create_sampling_callback(provider: Provider):
|
||||
"""Factory to create a sampling callback using any generation-capable provider.
|
||||
|
||||
The callback conforms to MCP's SamplingFnT protocol and can be passed
|
||||
to ClientSession for handling sampling requests from the server.
|
||||
|
||||
Args:
|
||||
provider: OpenAIProvider instance configured with a generation model
|
||||
provider: Any Provider instance that supports generation
|
||||
(supports_generation=True)
|
||||
|
||||
Returns:
|
||||
Async callback function for MCP sampling
|
||||
|
||||
Raises:
|
||||
ValueError: If provider doesn't support generation
|
||||
|
||||
Example:
|
||||
```python
|
||||
provider = OpenAIProvider(
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
base_url=os.getenv("OPENAI_BASE_URL"),
|
||||
generation_model="gpt-4o-mini",
|
||||
)
|
||||
callback = create_openai_sampling_callback(provider)
|
||||
from nextcloud_mcp_server.providers import get_provider
|
||||
|
||||
async for session in create_mcp_client_session(
|
||||
url="http://localhost:8000/mcp",
|
||||
sampling_callback=callback,
|
||||
):
|
||||
# Session now supports sampling
|
||||
pass
|
||||
provider = get_provider() # Auto-detect from environment
|
||||
if provider.supports_generation:
|
||||
callback = create_sampling_callback(provider)
|
||||
|
||||
async for session in create_mcp_client_session(
|
||||
url="http://localhost:8000/mcp",
|
||||
sampling_callback=callback,
|
||||
):
|
||||
# Session now supports sampling
|
||||
pass
|
||||
```
|
||||
"""
|
||||
if not provider.supports_generation:
|
||||
raise ValueError(
|
||||
f"Provider {provider.__class__.__name__} does not support generation"
|
||||
)
|
||||
|
||||
# Get model name for logging (provider-specific attribute)
|
||||
model_name = (
|
||||
getattr(provider, "generation_model", None) or provider.__class__.__name__
|
||||
)
|
||||
|
||||
async def sampling_callback(
|
||||
context: RequestContext[ClientSession, Any],
|
||||
params: types.CreateMessageRequestParams,
|
||||
) -> types.CreateMessageResult | types.ErrorData:
|
||||
"""Handle sampling requests using OpenAI provider."""
|
||||
"""Handle sampling requests using the configured provider."""
|
||||
logger.debug(f"Sampling callback invoked with {len(params.messages)} messages")
|
||||
|
||||
# Extract messages and build prompt
|
||||
@@ -68,14 +80,13 @@ def create_openai_sampling_callback(provider: OpenAIProvider):
|
||||
logger.debug(f"Generating response for prompt ({len(prompt)} chars)")
|
||||
|
||||
try:
|
||||
# Generate response using OpenAI provider
|
||||
# Note: temperature is hardcoded in the provider at 0.7
|
||||
# Generate response using provider
|
||||
# Note: temperature is typically hardcoded in providers at 0.7
|
||||
response = await provider.generate(
|
||||
prompt=prompt,
|
||||
max_tokens=params.maxTokens,
|
||||
)
|
||||
|
||||
model_name = provider.generation_model or "unknown"
|
||||
logger.info(f"Sampling completed: {len(response)} chars from {model_name}")
|
||||
|
||||
return types.CreateMessageResult(
|
||||
@@ -85,10 +96,25 @@ def create_openai_sampling_callback(provider: OpenAIProvider):
|
||||
stopReason="endTurn",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"OpenAI generation failed: {e}")
|
||||
logger.error(f"Generation failed ({provider.__class__.__name__}): {e}")
|
||||
return types.ErrorData(
|
||||
code=types.INTERNAL_ERROR,
|
||||
message=f"OpenAI generation failed: {e!s}",
|
||||
message=f"Generation failed: {e!s}",
|
||||
)
|
||||
|
||||
return sampling_callback
|
||||
|
||||
|
||||
def create_openai_sampling_callback(provider: "Provider"):
|
||||
"""Factory to create a sampling callback using OpenAI provider.
|
||||
|
||||
This is a backward-compatible wrapper around create_sampling_callback().
|
||||
Prefer using create_sampling_callback() directly for new code.
|
||||
|
||||
Args:
|
||||
provider: OpenAIProvider instance configured with a generation model
|
||||
|
||||
Returns:
|
||||
Async callback function for MCP sampling
|
||||
"""
|
||||
return create_sampling_callback(provider)
|
||||
|
||||
@@ -1,26 +1,33 @@
|
||||
"""Integration tests for RAG pipeline with OpenAI/GitHub Models API.
|
||||
"""Integration tests for RAG pipeline with multiple LLM providers.
|
||||
|
||||
These tests validate the complete semantic search and MCP sampling flow using:
|
||||
1. OpenAI embeddings for semantic search
|
||||
2. MCP sampling for answer generation
|
||||
1. MCP server's built-in semantic search (embeddings handled server-side)
|
||||
2. MCP sampling for answer generation (any generation-capable provider)
|
||||
3. Pre-indexed Nextcloud User Manual as the knowledge base
|
||||
|
||||
Environment Variables:
|
||||
OPENAI_API_KEY: OpenAI API key or GitHub token for models.github.ai
|
||||
OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
|
||||
OPENAI_GENERATION_MODEL: Generation model for sampling (default: "gpt-4o-mini")
|
||||
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud_User_Manual.pdf")
|
||||
Usage:
|
||||
# Run with OpenAI (including GitHub Models API)
|
||||
OPENAI_API_KEY=... pytest tests/integration/test_rag.py --provider=openai -v
|
||||
|
||||
For GitHub CI, set:
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: https://models.github.ai/inference
|
||||
OPENAI_EMBEDDING_MODEL: openai/text-embedding-3-small
|
||||
OPENAI_GENERATION_MODEL: openai/gpt-4o-mini
|
||||
# Run with Ollama
|
||||
OLLAMA_BASE_URL=http://localhost:11434 OLLAMA_GENERATION_MODEL=llama3.2:1b \\
|
||||
pytest tests/integration/test_rag.py --provider=ollama -v
|
||||
|
||||
# Run with Anthropic
|
||||
ANTHROPIC_API_KEY=... pytest tests/integration/test_rag.py --provider=anthropic -v
|
||||
|
||||
# Run with AWS Bedrock
|
||||
AWS_REGION=us-east-1 BEDROCK_GENERATION_MODEL=... \\
|
||||
pytest tests/integration/test_rag.py --provider=bedrock -v
|
||||
|
||||
Environment Variables:
|
||||
See tests/integration/provider_fixtures.py for provider-specific configuration.
|
||||
RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud Manual.pdf")
|
||||
|
||||
Prerequisites:
|
||||
- Nextcloud User Manual PDF uploaded to Nextcloud
|
||||
- VECTOR_SYNC_ENABLED=true on the MCP server
|
||||
- Provider-specific environment variables set
|
||||
"""
|
||||
|
||||
import json
|
||||
@@ -33,9 +40,10 @@ import anyio
|
||||
import pytest
|
||||
from mcp import ClientSession
|
||||
|
||||
from nextcloud_mcp_server.providers.openai import OpenAIProvider
|
||||
from nextcloud_mcp_server.providers.base import Provider
|
||||
from tests.conftest import create_mcp_client_session
|
||||
from tests.integration.sampling_support import create_openai_sampling_callback
|
||||
from tests.integration.provider_fixtures import create_generation_provider
|
||||
from tests.integration.sampling_support import create_sampling_callback
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -44,14 +52,14 @@ DEFAULT_MANUAL_PATH = "Nextcloud Manual.pdf"
|
||||
|
||||
|
||||
async def llm_judge(
|
||||
provider: "OpenAIProvider",
|
||||
provider: Provider,
|
||||
ground_truth: str,
|
||||
system_output: str,
|
||||
) -> bool:
|
||||
"""Use LLM to judge if system output aligns with ground truth.
|
||||
|
||||
Args:
|
||||
provider: OpenAI provider with generation capability
|
||||
provider: Any provider with generation capability
|
||||
ground_truth: The expected/reference answer
|
||||
system_output: The system's actual output to evaluate
|
||||
|
||||
@@ -66,17 +74,18 @@ Does the system output contain the key facts from the ground truth?
|
||||
|
||||
Answer: TRUE or FALSE"""
|
||||
|
||||
logger.info("Received ground truth: %s", ground_truth)
|
||||
logger.info("Received system output: %s", system_output)
|
||||
|
||||
response = await provider.generate(prompt, max_tokens=10)
|
||||
logger.info("LLM Judge response: %s", response)
|
||||
return "TRUE" in response.upper()
|
||||
|
||||
|
||||
# Skip all tests if OpenAI API key not configured
|
||||
# Mark all tests as integration tests
|
||||
pytestmark = [
|
||||
pytest.mark.integration,
|
||||
pytest.mark.skipif(
|
||||
not os.getenv("OPENAI_API_KEY"),
|
||||
reason="OPENAI_API_KEY not set - skipping OpenAI RAG tests",
|
||||
),
|
||||
pytest.mark.rag,
|
||||
]
|
||||
|
||||
# Ground truth fixture path
|
||||
@@ -175,78 +184,49 @@ async def indexed_manual_pdf(nc_client, nc_mcp_client):
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def openai_provider():
|
||||
"""OpenAI provider configured from environment (embeddings only)."""
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
|
||||
def provider_name(request) -> str:
|
||||
"""Get the provider name from --provider flag.
|
||||
|
||||
provider = OpenAIProvider(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=None, # Embeddings only
|
||||
)
|
||||
|
||||
yield provider
|
||||
await provider.close()
|
||||
Raises pytest.skip if --provider not specified.
|
||||
"""
|
||||
name = request.config.getoption("--provider")
|
||||
if not name:
|
||||
pytest.skip("--provider flag required (openai, ollama, anthropic, bedrock)")
|
||||
return name
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def openai_generation_provider():
|
||||
"""OpenAI provider configured for text generation (for sampling callback)."""
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
|
||||
|
||||
# For GitHub Models API, use the prefixed model name
|
||||
if base_url and "models.github.ai" in base_url:
|
||||
if not generation_model.startswith("openai/"):
|
||||
generation_model = f"openai/{generation_model}"
|
||||
|
||||
provider = OpenAIProvider(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=None, # Generation only
|
||||
generation_model=generation_model,
|
||||
)
|
||||
async def generation_provider(provider_name: str) -> AsyncGenerator[Provider, None]:
|
||||
"""Provider configured for text generation.
|
||||
|
||||
Requires --provider flag to be set.
|
||||
"""
|
||||
provider = await create_generation_provider(provider_name)
|
||||
yield provider
|
||||
await provider.close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
async def nc_mcp_client_with_sampling(
|
||||
anyio_backend, openai_generation_provider
|
||||
anyio_backend, generation_provider, provider_name
|
||||
) -> AsyncGenerator[ClientSession, Any]:
|
||||
"""MCP client with OpenAI-based sampling support.
|
||||
"""MCP client with sampling support using the specified provider.
|
||||
|
||||
This fixture creates an MCP client that can handle sampling requests
|
||||
from the server using OpenAI for text generation.
|
||||
from the server using the configured generation provider.
|
||||
"""
|
||||
sampling_callback = create_openai_sampling_callback(openai_generation_provider)
|
||||
sampling_callback = create_sampling_callback(generation_provider)
|
||||
|
||||
async for session in create_mcp_client_session(
|
||||
url="http://localhost:8000/mcp",
|
||||
client_name="OpenAI Sampling MCP",
|
||||
client_name=f"Sampling MCP ({provider_name})",
|
||||
sampling_callback=sampling_callback,
|
||||
):
|
||||
yield session
|
||||
|
||||
|
||||
async def test_openai_embeddings_work(openai_provider: OpenAIProvider):
|
||||
"""Test that OpenAI embeddings can be generated."""
|
||||
embedding = await openai_provider.embed("test query about Nextcloud")
|
||||
|
||||
assert isinstance(embedding, list)
|
||||
assert len(embedding) > 0
|
||||
assert all(isinstance(x, float) for x in embedding)
|
||||
# OpenAI embedding dimensions: 1536 (small) or 3072 (large)
|
||||
assert len(embedding) in [1536, 3072]
|
||||
|
||||
|
||||
async def test_semantic_search_retrieval(
|
||||
nc_mcp_client, ground_truth_qa, indexed_manual_pdf, openai_generation_provider
|
||||
nc_mcp_client, ground_truth_qa, indexed_manual_pdf, generation_provider
|
||||
):
|
||||
"""Test that semantic search retrieves relevant documents from the manual.
|
||||
|
||||
@@ -278,7 +258,7 @@ async def test_semantic_search_retrieval(
|
||||
# Use LLM judge to evaluate if excerpts are relevant to ground truth
|
||||
all_excerpts = " ".join([r["excerpt"] for r in data["results"]])
|
||||
is_relevant = await llm_judge(
|
||||
openai_generation_provider,
|
||||
generation_provider,
|
||||
test_case["ground_truth"],
|
||||
all_excerpts,
|
||||
)
|
||||
@@ -289,16 +269,16 @@ async def test_semantic_search_answer_with_sampling(
|
||||
nc_mcp_client_with_sampling,
|
||||
ground_truth_qa,
|
||||
indexed_manual_pdf,
|
||||
openai_generation_provider,
|
||||
generation_provider,
|
||||
):
|
||||
"""Test semantic search with MCP sampling for answer generation.
|
||||
|
||||
This tests the full RAG pipeline:
|
||||
1. Semantic search retrieves relevant documents
|
||||
2. MCP sampling generates an answer from the retrieved context
|
||||
3. OpenAI generates the answer via the sampling callback
|
||||
3. Provider generates the answer via the sampling callback
|
||||
|
||||
Uses nc_mcp_client_with_sampling which has OpenAI-based sampling enabled.
|
||||
Uses nc_mcp_client_with_sampling which has sampling enabled.
|
||||
"""
|
||||
# Use the 2FA question - has clear expected answer
|
||||
test_case = ground_truth_qa[0]
|
||||
@@ -348,7 +328,7 @@ async def test_semantic_search_answer_with_sampling(
|
||||
|
||||
# Use LLM judge to evaluate answer relevance
|
||||
is_relevant = await llm_judge(
|
||||
openai_generation_provider,
|
||||
generation_provider,
|
||||
test_case["ground_truth"],
|
||||
data["generated_answer"],
|
||||
)
|
||||
Reference in New Issue
Block a user