nextcloud-mcp-server/tests/integration/test_sampling.py

"""Integration tests for MCP sampling with semantic search.

These tests validate the nc_notes_semantic_search_answer tool which combines:
1. Semantic search to retrieve relevant documents
2. MCP sampling to generate natural language answers

Tests cover three scenarios:
- Successful sampling (LLM generates answer)
- Sampling fallback (client doesn't support sampling)
- No results (no relevant documents found)

Note: These tests require VECTOR_SYNC_ENABLED=true and a configured
vector database with indexed test data.
"""

from unittest.mock import MagicMock

import pytest
from mcp.types import CreateMessageResult, TextContent

pytestmark = pytest.mark.integration


@pytest.fixture
def mock_sampling_result():
    """Mock successful sampling result from MCP client."""
    result = MagicMock(spec=CreateMessageResult)
    result.content = TextContent(
        type="text",
        text=(
            "Based on Document 1 (Python Async Programming) and Document 2 "
            "(Best Practices), you should use async/await for asynchronous "
            "programming and always use async context managers for resources."
        ),
    )
    result.model = "claude-3-5-sonnet"
    result.stopReason = "endTurn"
    return result


@pytest.mark.asyncio
async def test_semantic_search_answer_successful_sampling(
    nc_mcp_client, temporary_note, mock_sampling_result
):
    """Test semantic search with successful LLM answer generation.

    Prerequisites:
    - VECTOR_SYNC_ENABLED=true
    - Qdrant running and indexed
    - Test note indexed in vector database

    Flow:
    1. Create test note with searchable content
    2. Call nc_notes_semantic_search_answer
    3. Mock ctx.session.create_message to return answer
    4. Verify response contains generated answer and sources
    """
    # Create a note with content about Python async
    _note = await temporary_note(
        title="Python Async Guide",
        content="""# Python Async Programming

## Key Concepts
- Use async def for coroutines
- Use await for async operations
- asyncio.gather() for parallel execution

## Best Practices
Always use async context managers for resources.
Avoid blocking operations in async code.""",
        category="Development",
    )

    # Wait for vector indexing (if background sync is slow)
    import asyncio

    await asyncio.sleep(2)

    # Mock the sampling call
    # Note: This requires monkey-patching ctx.session.create_message
    # In a real integration test with MCP Inspector, this would be actual sampling

    result = await nc_mcp_client.call_tool(
        "nc_notes_semantic_search_answer",
        arguments={
            "query": "How do I use async in Python?",
            "limit": 5,
            "score_threshold": 0.5,
        },
    )

    # Verify response structure
    assert result is not None
    assert "query" in result
    assert "generated_answer" in result
    assert "sources" in result
    assert "total_found" in result
    assert "search_method" in result

    # For this test, sampling might fail (no real LLM client)
    # So we check for either success or fallback
    if "[Sampling unavailable" in result["generated_answer"]:
        # Fallback mode - should still have sources
        assert result["search_method"] == "semantic_sampling_fallback"
        assert len(result["sources"]) > 0
        pytest.skip("Sampling not supported by test client (expected fallback)")
    else:
        # Successful sampling
        assert result["search_method"] == "semantic_sampling"
        assert "async" in result["generated_answer"].lower()
        assert len(result["sources"]) > 0
        assert result["model_used"] is not None


@pytest.mark.asyncio
async def test_semantic_search_answer_no_results(nc_mcp_client):
    """Test semantic search answer when no documents match.

    Flow:
    1. Query for completely unrelated topic
    2. Verify response indicates no documents found
    3. Verify no sampling call was made (no sources to base answer on)
    """
    result = await nc_mcp_client.call_tool(
        "nc_notes_semantic_search_answer",
        arguments={
            "query": "quantum chromodynamics lattice QCD gluon propagator",
            "limit": 5,
            "score_threshold": 0.7,
        },
    )

    # Should get "no documents found" message
    assert result is not None
    assert result["total_found"] == 0
    assert len(result["sources"]) == 0
    assert "No relevant documents" in result["generated_answer"]
    assert result["search_method"] == "semantic_sampling"
    # No sampling should have occurred
    assert result["model_used"] is None
    assert result["stop_reason"] is None


@pytest.mark.asyncio
async def test_semantic_search_answer_with_limit(nc_mcp_client, temporary_note):
    """Test semantic search answer respects limit parameter.

    Flow:
    1. Create multiple related notes
    2. Query with limit=2
    3. Verify at most 2 sources in response
    """
    # Create multiple related notes
    _note1 = await temporary_note(
        title="Python Async Part 1",
        content="Use async/await for asynchronous operations",
        category="Development",
    )
    _note2 = await temporary_note(
        title="Python Async Part 2",
        content="Use asyncio.gather() for parallel execution",
        category="Development",
    )
    _note3 = await temporary_note(
        title="Python Async Part 3",
        content="Always use async context managers",
        category="Development",
    )

    # Wait for indexing
    import asyncio

    await asyncio.sleep(2)

    result = await nc_mcp_client.call_tool(
        "nc_notes_semantic_search_answer",
        arguments={
            "query": "async programming in Python",
            "limit": 2,
            "score_threshold": 0.5,
        },
    )

    # Should respect limit
    assert len(result["sources"]) <= 2


@pytest.mark.asyncio
async def test_semantic_search_answer_score_threshold(nc_mcp_client, temporary_note):
    """Test semantic search answer respects score threshold.

    Flow:
    1. Create note with specific content
    2. Query with high threshold (0.9)
    3. Verify only high-scoring results returned
    """
    _note = await temporary_note(
        title="Exact Match Test",
        content="This is a very specific test document about widget manufacturing",
        category="Test",
    )

    # Wait for indexing
    import asyncio

    await asyncio.sleep(2)

    # Query with exact match - should have high score
    result = await nc_mcp_client.call_tool(
        "nc_notes_semantic_search_answer",
        arguments={
            "query": "widget manufacturing",
            "limit": 5,
            "score_threshold": 0.9,
        },
    )

    # Note: Semantic search scores depend on embedding model
    # We just verify the tool accepts the parameter
    assert "score_threshold" not in result  # Not exposed in response
    if result["total_found"] > 0:
        # If results found, verify they're in sources
        assert all("score" in source for source in result["sources"])


@pytest.mark.asyncio
async def test_semantic_search_answer_max_tokens(nc_mcp_client, temporary_note):
    """Test semantic search answer respects max_answer_tokens parameter.

    Flow:
    1. Create note with content
    2. Call with very small max_tokens (100)
    3. Verify parameter is accepted (actual token limiting happens in client)

    Note: Token limiting is enforced by the MCP client's LLM, not the server.
    This test just verifies the parameter is correctly passed.
    """
    _note = await temporary_note(
        title="Long Document",
        content="This is a document with lots of content. " * 50,
        category="Test",
    )

    # Wait for indexing
    import asyncio

    await asyncio.sleep(2)

    result = await nc_mcp_client.call_tool(
        "nc_notes_semantic_search_answer",
        arguments={
            "query": "document content",
            "limit": 5,
            "score_threshold": 0.5,
            "max_answer_tokens": 100,
        },
    )

    # Should not error, even if sampling fails
    assert result is not None
    assert "generated_answer" in result


@pytest.mark.asyncio
async def test_semantic_search_answer_requires_vector_sync():
    """Test that semantic search answer fails when VECTOR_SYNC_ENABLED=false.

    This test validates the tool properly checks for vector sync being enabled.

    Note: This test requires a separate test client with VECTOR_SYNC_ENABLED=false,
    which may not be available in the current test environment. Skipping for now.
    """
    pytest.skip(
        "Requires test environment with VECTOR_SYNC_ENABLED=false, "
        "which would break other semantic search tests"
    )