4b026e9aa0
This implements ADR-009, which documents the decision to use a generic
`semantic:read` OAuth scope instead of requiring all app-specific scopes
for semantic search functionality.
Changes:
- Created new `nextcloud_mcp_server/models/semantic.py` with semantic search models
- SemanticSearchResult (with new doc_type field for multi-app support)
- SemanticSearchResponse
- SamplingSearchResponse
- VectorSyncStatusResponse
- Created new `nextcloud_mcp_server/server/semantic.py` with semantic search tools
- nc_semantic_search (renamed from nc_notes_semantic_search)
- nc_semantic_search_answer (renamed from nc_notes_semantic_search_answer)
- nc_get_vector_sync_status (renamed from nc_notes_get_vector_sync_status)
- All tools now use @require_scopes("semantic:read") instead of "notes:read"
- Updated `nextcloud_mcp_server/server/notes.py`
- Removed semantic search tools (moved to semantic.py)
- Removed semantic search model imports
- Removed unused MCP imports (ModelHint, ModelPreferences, etc.)
- Updated `nextcloud_mcp_server/models/notes.py`
- Removed semantic search models (moved to semantic.py)
- Updated `nextcloud_mcp_server/app.py`
- Import configure_semantic_tools
- Register semantic tools when VECTOR_SYNC_ENABLED=true
- Updated `nextcloud_mcp_server/server/__init__.py`
- Export configure_semantic_tools
- Updated tests
- tests/integration/test_sampling.py: Use new tool names
- tests/unit/test_response_models.py: Import from semantic.py, add doc_type field
Architecture:
- Semantic search is now a cross-app feature, not tied to Notes
- Uses dual-phase authorization: semantic:read scope + per-document verification
- Supports future multi-app indexing (notes, calendar, deck, files, contacts)
Test results:
- All 69 unit tests passing
- All 5 smoke tests passing
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
110 lines
4.0 KiB
Python
110 lines
4.0 KiB
Python
"""Pydantic models for semantic search responses."""
|
|
|
|
from typing import List, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
from .base import BaseResponse
|
|
|
|
|
|
class SemanticSearchResult(BaseModel):
|
|
"""Model for semantic search results with additional metadata."""
|
|
|
|
id: int = Field(description="Document ID")
|
|
doc_type: str = Field(
|
|
description="Document type (note, calendar_event, deck_card, etc.)"
|
|
)
|
|
title: str = Field(description="Document title")
|
|
category: str = Field(
|
|
default="", description="Document category (notes) or location (calendar)"
|
|
)
|
|
excerpt: str = Field(description="Excerpt from matching chunk")
|
|
score: float = Field(description="Semantic similarity score (0-1)")
|
|
chunk_index: int = Field(description="Index of matching chunk in document")
|
|
total_chunks: int = Field(description="Total number of chunks in document")
|
|
|
|
|
|
class SemanticSearchResponse(BaseResponse):
|
|
"""Response model for semantic search across all indexed Nextcloud apps."""
|
|
|
|
results: List[SemanticSearchResult] = Field(
|
|
description="Semantic search results with similarity scores"
|
|
)
|
|
query: str = Field(description="The search query used")
|
|
total_found: int = Field(description="Total number of documents found")
|
|
search_method: str = Field(
|
|
default="semantic", description="Search method used (semantic or hybrid)"
|
|
)
|
|
|
|
|
|
class SamplingSearchResponse(BaseResponse):
|
|
"""Response from semantic search with LLM-generated answer via MCP sampling.
|
|
|
|
This response includes both a generated natural language answer (created by
|
|
the MCP client's LLM via sampling) and the source documents used to generate
|
|
that answer. Users can read the answer for quick information and review
|
|
sources for verification and deeper exploration.
|
|
|
|
Attributes:
|
|
query: The original user query
|
|
generated_answer: Natural language answer generated by client's LLM
|
|
sources: List of semantic search results used as context
|
|
total_found: Total number of matching documents found
|
|
search_method: Always "semantic_sampling" for this response type
|
|
model_used: Name of model that generated the answer (e.g., "claude-3-5-sonnet")
|
|
stop_reason: Why generation stopped ("endTurn", "maxTokens", etc.)
|
|
"""
|
|
|
|
query: str = Field(..., description="Original user query")
|
|
generated_answer: str = Field(
|
|
..., description="LLM-generated answer based on retrieved documents"
|
|
)
|
|
sources: List[SemanticSearchResult] = Field(
|
|
default_factory=list,
|
|
description="Source documents with excerpts and relevance scores",
|
|
)
|
|
total_found: int = Field(..., description="Total matching documents")
|
|
search_method: str = Field(
|
|
default="semantic_sampling", description="Search method used"
|
|
)
|
|
model_used: Optional[str] = Field(
|
|
default=None, description="Model that generated the answer"
|
|
)
|
|
stop_reason: Optional[str] = Field(
|
|
default=None, description="Reason generation stopped"
|
|
)
|
|
|
|
|
|
class VectorSyncStatusResponse(BaseResponse):
|
|
"""Response for vector sync status.
|
|
|
|
Provides information about the current state of vector sync,
|
|
including how many documents are indexed and how many are pending.
|
|
|
|
Attributes:
|
|
indexed_count: Number of documents in Qdrant vector database
|
|
pending_count: Number of documents in processing queue
|
|
status: Current sync status ("idle" or "syncing")
|
|
enabled: Whether vector sync is enabled
|
|
"""
|
|
|
|
indexed_count: int = Field(
|
|
default=0, description="Number of documents indexed in vector database"
|
|
)
|
|
pending_count: int = Field(
|
|
default=0, description="Number of documents pending processing"
|
|
)
|
|
status: str = Field(
|
|
default="disabled",
|
|
description='Sync status: "idle", "syncing", or "disabled"',
|
|
)
|
|
enabled: bool = Field(default=False, description="Whether vector sync is enabled")
|
|
|
|
|
|
__all__ = [
|
|
"SemanticSearchResult",
|
|
"SemanticSearchResponse",
|
|
"SamplingSearchResponse",
|
|
"VectorSyncStatusResponse",
|
|
]
|