Files
nextcloud-mcp-server/tests/unit/providers/test_openai.py
Chris Coutinho 208365cd3d feat: Add OpenAI provider support for embeddings and generation
Adds OpenAI provider to the unified provider architecture (ADR-015),
supporting:
- OpenAI API (api.openai.com)
- GitHub Models API (models.github.ai/inference)
- OpenAI-compatible endpoints (Fireworks, Together, etc.)

Features:
- Embedding support with text-embedding-3-small/large models
- Text generation via chat completions API
- Automatic retry with exponential backoff for rate limits
- Provider auto-detection in registry (priority after Bedrock)

Environment variables:
- OPENAI_API_KEY: API key (required)
- OPENAI_BASE_URL: Base URL override (optional)
- OPENAI_EMBEDDING_MODEL: Embedding model (default: text-embedding-3-small)
- OPENAI_GENERATION_MODEL: Generation model (default: gpt-4o-mini)

Also adds:
- Integration tests for RAG pipeline with MCP sampling
- MCP client sampling support for integration tests
- Ground truth Q&A pairs for Nextcloud User Manual

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-23 00:33:32 +01:00

293 lines
8.5 KiB
Python

"""Unit tests for OpenAI provider."""
from unittest.mock import AsyncMock, MagicMock
import pytest
from nextcloud_mcp_server.providers.openai import (
OPENAI_EMBEDDING_DIMENSIONS,
OpenAIProvider,
)
@pytest.fixture
def mock_openai_client(mocker):
"""Mock OpenAI AsyncClient."""
mock_client = MagicMock()
mock_client.embeddings = MagicMock()
mock_client.chat = MagicMock()
mock_client.chat.completions = MagicMock()
mock_client.close = AsyncMock()
mocker.patch(
"nextcloud_mcp_server.providers.openai.AsyncOpenAI", return_value=mock_client
)
return mock_client
@pytest.mark.unit
async def test_openai_embedding(mock_openai_client):
"""Test OpenAI embedding with text-embedding-3-small."""
# Mock response
mock_embedding_data = MagicMock()
mock_embedding_data.embedding = [0.1, 0.2, 0.3]
mock_embedding_data.index = 0
mock_response = MagicMock()
mock_response.data = [mock_embedding_data]
mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response)
# Create provider
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
generation_model=None,
)
# Test embedding
embedding = await provider.embed("test text")
assert embedding == [0.1, 0.2, 0.3]
mock_openai_client.embeddings.create.assert_called_once_with(
input="test text",
model="text-embedding-3-small",
)
@pytest.mark.unit
async def test_openai_embedding_batch(mock_openai_client):
"""Test OpenAI batch embedding."""
# Mock response
mock_embedding_data_1 = MagicMock()
mock_embedding_data_1.embedding = [0.1, 0.2, 0.3]
mock_embedding_data_1.index = 0
mock_embedding_data_2 = MagicMock()
mock_embedding_data_2.embedding = [0.4, 0.5, 0.6]
mock_embedding_data_2.index = 1
mock_response = MagicMock()
mock_response.data = [mock_embedding_data_1, mock_embedding_data_2]
mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response)
# Create provider
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
generation_model=None,
)
# Test batch embedding
embeddings = await provider.embed_batch(["text1", "text2"])
assert len(embeddings) == 2
assert embeddings[0] == [0.1, 0.2, 0.3]
assert embeddings[1] == [0.4, 0.5, 0.6]
mock_openai_client.embeddings.create.assert_called_once_with(
input=["text1", "text2"],
model="text-embedding-3-small",
)
@pytest.mark.unit
async def test_openai_generation(mock_openai_client):
"""Test OpenAI text generation."""
# Mock response
mock_choice = MagicMock()
mock_choice.message.content = "Generated response"
mock_response = MagicMock()
mock_response.choices = [mock_choice]
mock_openai_client.chat.completions.create = AsyncMock(return_value=mock_response)
# Create provider
provider = OpenAIProvider(
api_key="test-key",
embedding_model=None,
generation_model="gpt-4o-mini",
)
# Test generation
text = await provider.generate("test prompt", max_tokens=100)
assert text == "Generated response"
mock_openai_client.chat.completions.create.assert_called_once_with(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "test prompt"}],
max_tokens=100,
temperature=0.7,
)
@pytest.mark.unit
async def test_openai_both_capabilities(mock_openai_client):
"""Test OpenAI with both embedding and generation models."""
# Mock embedding response
mock_embedding_data = MagicMock()
mock_embedding_data.embedding = [0.1, 0.2]
mock_embedding_data.index = 0
mock_embed_response = MagicMock()
mock_embed_response.data = [mock_embedding_data]
mock_openai_client.embeddings.create = AsyncMock(return_value=mock_embed_response)
# Mock generation response
mock_choice = MagicMock()
mock_choice.message.content = "Response"
mock_gen_response = MagicMock()
mock_gen_response.choices = [mock_choice]
mock_openai_client.chat.completions.create = AsyncMock(
return_value=mock_gen_response
)
# Create provider with both models
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
generation_model="gpt-4o-mini",
)
assert provider.supports_embeddings is True
assert provider.supports_generation is True
# Test both capabilities
embedding = await provider.embed("test")
assert embedding == [0.1, 0.2]
text = await provider.generate("test")
assert text == "Response"
@pytest.mark.unit
async def test_openai_no_embeddings():
"""Test OpenAI provider with no embedding model raises error."""
provider = OpenAIProvider(
api_key="test-key",
embedding_model=None,
generation_model="gpt-4o-mini",
)
assert provider.supports_embeddings is False
with pytest.raises(NotImplementedError, match="no embedding_model configured"):
await provider.embed("test")
with pytest.raises(NotImplementedError, match="no embedding_model configured"):
await provider.embed_batch(["test"])
with pytest.raises(NotImplementedError, match="no embedding_model configured"):
provider.get_dimension()
@pytest.mark.unit
async def test_openai_no_generation():
"""Test OpenAI provider with no generation model raises error."""
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
generation_model=None,
)
assert provider.supports_generation is False
with pytest.raises(NotImplementedError, match="no generation_model configured"):
await provider.generate("test")
@pytest.mark.unit
async def test_openai_known_dimension():
"""Test dimension detection for known OpenAI models."""
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
)
# Known model should have dimension set from lookup table
assert provider.get_dimension() == 1536
@pytest.mark.unit
async def test_openai_unknown_dimension_detected(mock_openai_client):
"""Test dimension detection for unknown model via API call."""
# Mock response with specific dimension
mock_embedding_data = MagicMock()
mock_embedding_data.embedding = [0.1] * 768
mock_embedding_data.index = 0
mock_response = MagicMock()
mock_response.data = [mock_embedding_data]
mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response)
provider = OpenAIProvider(
api_key="test-key",
embedding_model="custom-embedding-model",
)
# Dimension not known yet for custom model
with pytest.raises(RuntimeError, match="not detected yet"):
provider.get_dimension()
# Detect dimension via embed call
await provider.embed("test")
# Now dimension should be available
assert provider.get_dimension() == 768
@pytest.mark.unit
async def test_openai_github_models_api(mock_openai_client):
"""Test OpenAI provider with GitHub Models API configuration."""
# Mock response
mock_embedding_data = MagicMock()
mock_embedding_data.embedding = [0.1, 0.2, 0.3]
mock_embedding_data.index = 0
mock_response = MagicMock()
mock_response.data = [mock_embedding_data]
mock_openai_client.embeddings.create = AsyncMock(return_value=mock_response)
# Create provider with GitHub Models configuration
provider = OpenAIProvider(
api_key="ghp_test_token",
base_url="https://models.github.ai/inference",
embedding_model="openai/text-embedding-3-small",
generation_model=None,
)
# Known dimension for GitHub Models prefixed model
assert (
provider.get_dimension()
== OPENAI_EMBEDDING_DIMENSIONS["openai/text-embedding-3-small"]
)
# Test embedding
embedding = await provider.embed("test text")
assert embedding == [0.1, 0.2, 0.3]
@pytest.mark.unit
async def test_openai_empty_batch():
"""Test OpenAI batch embedding with empty list."""
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
)
embeddings = await provider.embed_batch([])
assert embeddings == []
@pytest.mark.unit
async def test_openai_close(mock_openai_client):
"""Test OpenAI client close."""
provider = OpenAIProvider(
api_key="test-key",
embedding_model="text-embedding-3-small",
)
await provider.close()
mock_openai_client.close.assert_called_once()