nextcloud-mcp-server/nextcloud_mcp_server/models/semantic.py

"""Pydantic models for semantic search responses."""

from typing import List, Optional

from pydantic import BaseModel, Field

from .base import BaseResponse


class SemanticSearchResult(BaseModel):
    """Model for semantic search results with additional metadata."""

    id: int = Field(description="Document ID (int for all document types)")
    doc_type: str = Field(
        description="Document type (note, calendar_event, deck_card, etc.)"
    )
    title: str = Field(description="Document title")
    category: str = Field(
        default="", description="Document category (notes) or location (calendar)"
    )
    excerpt: str = Field(description="Excerpt from matching chunk")
    score: float = Field(
        description=(
            "Relevance score (≥ 0.0, higher is better). "
            "Score range depends on fusion method: "
            "RRF produces scores in [0.0, 1.0], "
            "DBSF can exceed 1.0 (sum of normalized scores from multiple systems)"
        )
    )
    chunk_index: int = Field(description="Index of matching chunk in document")
    total_chunks: int = Field(description="Total number of chunks in document")
    chunk_start_offset: Optional[int] = Field(
        default=None, description="Character position where chunk starts in document"
    )
    chunk_end_offset: Optional[int] = Field(
        default=None, description="Character position where chunk ends in document"
    )
    page_number: Optional[int] = Field(
        default=None, description="Page number for PDF documents"
    )
    page_count: Optional[int] = Field(
        default=None, description="Total number of pages in PDF document"
    )
    # Context expansion fields (optional, populated when include_context=True)
    has_context_expansion: bool = Field(
        default=False, description="Whether context expansion was performed"
    )
    marked_text: Optional[str] = Field(
        default=None,
        description="Full text with position markers around matched chunk",
    )
    before_context: Optional[str] = Field(
        default=None, description="Text before the matched chunk"
    )
    after_context: Optional[str] = Field(
        default=None, description="Text after the matched chunk"
    )
    has_before_truncation: Optional[bool] = Field(
        default=None, description="Whether before_context was truncated"
    )
    has_after_truncation: Optional[bool] = Field(
        default=None, description="Whether after_context was truncated"
    )


class SemanticSearchResponse(BaseResponse):
    """Response model for semantic search across all indexed Nextcloud apps."""

    results: List[SemanticSearchResult] = Field(
        description="Semantic search results with similarity scores"
    )
    query: str = Field(description="The search query used")
    total_found: int = Field(description="Total number of documents found")
    search_method: str = Field(
        default="semantic", description="Search method used (semantic or hybrid)"
    )


class SamplingSearchResponse(BaseResponse):
    """Response from semantic search with LLM-generated answer via MCP sampling.

    This response includes both a generated natural language answer (created by
    the MCP client's LLM via sampling) and the source documents used to generate
    that answer. Users can read the answer for quick information and review
    sources for verification and deeper exploration.

    Attributes:
        query: The original user query
        generated_answer: Natural language answer generated by client's LLM
        sources: List of semantic search results used as context
        total_found: Total number of matching documents found
        search_method: Always "semantic_sampling" for this response type
        model_used: Name of model that generated the answer (e.g., "claude-3-5-sonnet")
        stop_reason: Why generation stopped ("endTurn", "maxTokens", etc.)
    """

    query: str = Field(..., description="Original user query")
    generated_answer: str = Field(
        ..., description="LLM-generated answer based on retrieved documents"
    )
    sources: List[SemanticSearchResult] = Field(
        default_factory=list,
        description="Source documents with excerpts and relevance scores",
    )
    total_found: int = Field(..., description="Total matching documents")
    search_method: str = Field(
        default="semantic_sampling", description="Search method used"
    )
    model_used: Optional[str] = Field(
        default=None, description="Model that generated the answer"
    )
    stop_reason: Optional[str] = Field(
        default=None, description="Reason generation stopped"
    )


class VectorSyncStatusResponse(BaseResponse):
    """Response for vector sync status.

    Provides information about the current state of vector sync,
    including how many documents are indexed and how many are pending.

    Attributes:
        indexed_count: Number of documents in Qdrant vector database
        pending_count: Number of documents in processing queue
        status: Current sync status ("idle" or "syncing")
        enabled: Whether vector sync is enabled
    """

    indexed_count: int = Field(
        default=0, description="Number of documents indexed in vector database"
    )
    pending_count: int = Field(
        default=0, description="Number of documents pending processing"
    )
    status: str = Field(
        default="disabled",
        description='Sync status: "idle", "syncing", or "disabled"',
    )
    enabled: bool = Field(default=False, description="Whether vector sync is enabled")


__all__ = [
    "SemanticSearchResult",
    "SemanticSearchResponse",
    "SamplingSearchResponse",
    "VectorSyncStatusResponse",
]