From e575c8e57bf1247525dd41ad88f8a3870d575294 Mon Sep 17 00:00:00 2001 From: Chris Coutinho Date: Mon, 10 Nov 2025 01:18:30 +0100 Subject: [PATCH] feat(vector): Support multiple embedding models with auto-generated collection names MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR enables safe switching between embedding models and multi-server deployments by implementing auto-generated Qdrant collection names based on deployment ID and model name. ## Problem Previously, all deployments used a single hardcoded collection name "nextcloud_content", which caused two critical issues: 1. **Dimension mismatches when switching models**: Changing OLLAMA_EMBEDDING_MODEL (e.g., nomic-embed-text at 768D → all-minilm at 384D) would cause runtime errors as vectors couldn't be inserted into a collection with incompatible dimensions. 2. **Collection collisions in multi-server setups**: Multiple MCP servers sharing a single Qdrant instance would overwrite each other's data, making horizontal scaling impossible. ## Solution ### Auto-Generated Collection Naming Collections are now automatically named using the pattern: \`{deployment-id}-{model-name}\` **Deployment ID**: Uses \`OTEL_SERVICE_NAME\` if configured (and not default value), otherwise falls back to \`hostname\` for simple Docker deployments. **Model Name**: From \`OLLAMA_EMBEDDING_MODEL\` with path separators sanitized. **Examples**: - \`my-mcp-server-nomic-embed-text\` (with OTEL_SERVICE_NAME=my-mcp-server) - \`mcp-container-all-minilm\` (simple Docker, hostname=mcp-container) **Override**: Users can still set \`QDRANT_COLLECTION\` explicitly to bypass auto-generation for backward compatibility. ### Dimension Validation Added startup validation that checks collection dimensions match the embedding service. If a mismatch is detected, the server fails fast with a clear error message explaining: - Expected vs actual dimensions - Likely cause (model change) - Solutions (delete collection, use different name, or revert model) ### Improved Sampling Error Handling Enhanced MCP sampling rejection handling to treat user rejections as normal behavior rather than errors: - **User rejections** ("rejected", "denied") → INFO log, no traceback - **Unsupported clients** → INFO log, no traceback - **Other MCP errors** → WARNING log, no traceback - **Unexpected errors** → ERROR log WITH traceback This aligns with the MCP specification where clients SHOULD prompt users for approval/denial of sampling requests. ## Changes ### Core Implementation - **nextcloud_mcp_server/config.py**: Added \`get_collection_name()\` method with deployment ID detection and model name sanitization - **nextcloud_mcp_server/vector/qdrant_client.py**: Dimension validation on collection open with helpful error messages - **nextcloud_mcp_server/vector/{scanner,processor}.py**: Updated to use \`get_collection_name()\` - **nextcloud_mcp_server/auth/userinfo_routes.py**: Vector sync status uses \`get_collection_name()\` - **nextcloud_mcp_server/server/semantic.py**: - Updated semantic search tools to use \`get_collection_name()\` - Improved sampling rejection error handling (McpError vs Exception) ### Documentation - **docs/semantic-search-architecture.md**: New comprehensive architecture document (557 lines) covering background sync, semantic search flow, RAG implementation, and deployment modes - **docs/configuration.md**: Added detailed "Qdrant Collection Naming" section with examples and multi-server deployment guidance - **docker-compose.yml**: Added comments explaining collection naming behavior - **README.md**: Updated semantic search descriptions to clarify experimental status, Notes-only support, and infrastructure requirements ## Migration Guide **For existing single-server deployments:** Option 1 (Recommended): Use explicit collection name for continuity \`\`\`bash QDRANT_COLLECTION=nextcloud_content # Keep existing collection \`\`\` Option 2: Allow auto-generation and re-embed \`\`\`bash # Remove QDRANT_COLLECTION override # New collection will be created based on deployment ID + model # Requires re-embedding all documents (may take time) \`\`\` **For new multi-server deployments:** Set unique OTEL service names per server: \`\`\`bash # Server 1 OTEL_SERVICE_NAME=mcp-prod OLLAMA_EMBEDDING_MODEL=nomic-embed-text # → Collection: "mcp-prod-nomic-embed-text" # Server 2 OTEL_SERVICE_NAME=mcp-staging OLLAMA_EMBEDDING_MODEL=nomic-embed-text # → Collection: "mcp-staging-nomic-embed-text" \`\`\` ## Benefits ✅ **Safe model switching**: Each model gets its own collection, preventing dimension mismatch errors ✅ **Multi-server support**: Multiple MCP servers can share one Qdrant instance without conflicts ✅ **Clear ownership**: Collection names show which deployment and model owns the data ✅ **Better error messages**: Dimension validation provides actionable guidance ✅ **Backward compatible**: Existing deployments can continue using \`QDRANT_COLLECTION\` override ## Testing Validated with: - Single-server deployments (default hostname-based naming) - Multi-server deployments (OTEL service name-based naming) - Model switching scenarios (dimension validation) - Collection override scenarios (backward compatibility) Next steps: Testing various Ollama embedding models to investigate optimal chunk sizes and performance characteristics. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- README.md | 394 +++------ docker-compose.yml | 21 +- docs/configuration.md | 105 +++ docs/semantic-search-architecture.md | 874 +++++++++++++++++++ nextcloud_mcp_server/auth/userinfo_routes.py | 17 +- nextcloud_mcp_server/config.py | 40 + nextcloud_mcp_server/server/semantic.py | 196 ++++- nextcloud_mcp_server/vector/processor.py | 4 +- nextcloud_mcp_server/vector/qdrant_client.py | 45 +- nextcloud_mcp_server/vector/scanner.py | 4 +- 10 files changed, 1361 insertions(+), 339 deletions(-) create mode 100644 docs/semantic-search-architecture.md diff --git a/README.md b/README.md index aa4077f..d3c632e 100644 --- a/README.md +++ b/README.md @@ -2,286 +2,134 @@ [![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server) -**Enable AI assistants to interact with your Nextcloud instance.** +**A production-ready MCP server that connects AI assistants to your Nextcloud instance.** -The Nextcloud MCP (Model Context Protocol) server allows Large Language Models like Claude, GPT, and Gemini to interact with your Nextcloud data through a secure API. Create notes, manage calendars, organize contacts, work with files, and more - all through natural language. +Enable Large Language Models like Claude, GPT, and Gemini to interact with your Nextcloud data through a secure API. Create notes, manage calendars, organize contacts, work with files, and more - all through natural language conversations. + +This is a **dedicated standalone MCP server** designed for external MCP clients like Claude Code and IDEs. It runs independently of Nextcloud (Docker, VM, Kubernetes, or local) and provides deep CRUD operations across Nextcloud apps. > [!NOTE] -> **Nextcloud has two ways to enable AI access:** Nextcloud provides [Context Agent](https://github.com/nextcloud/context_agent), an AI agent backend that powers the [Assistant](https://github.com/nextcloud/assistant) app and allows AI to interact with Nextcloud apps like Calendar, Talk, and Contacts. Context Agent runs as an ExApp inside Nextcloud and also _[exposes an MCP server](https://docs.nextcloud.com/server/stable/admin_manual/ai/app_context_agent.html#using-nextcloud-mcp-server)_ for external MCP clients. -> -> This project (Nextcloud MCP Server) is a **dedicated standalone MCP server** designed specifically for external MCP clients like Claude Code and IDEs, with deep CRUD operations and OAuth support. It does not require any additional AI-features to be enabled in Nextcloud beyond the apps that you intend to interact with. - -### High-level Comparison: Nextcloud MCP Server vs. Nextcloud AI Stack - -| Aspect | **Nextcloud MCP Server**
(This Project) | **Nextcloud AI Stack**
(Assistant + Context Agent) | -|--------|---------------------------------------------|--------------------------------------------------------| -| **Purpose** | External MCP client access to Nextcloud | AI assistance within Nextcloud UI | -| **Deployment** | Standalone (Docker, VM, K8s) | Inside Nextcloud (ExApp via AppAPI) | -| **Primary Users** | Claude Code, IDEs, external developers | Nextcloud end users via Assistant app | -| **Authentication** | OAuth2/OIDC or Basic Auth | Session-based (integrated) | -| **Notes Support** | ✅ Full CRUD + keyword search (7 tools) | ❌ Not implemented | -| **Semantic Search** | ✅ Multi-app vector search (2+ tools) | ❌ Not implemented | -| **Calendar** | ✅ Full CalDAV + tasks (20+ tools) | ✅ Events, free/busy, tasks (4 tools) | -| **Contacts** | ✅ Full CardDAV (8 tools) | ✅ Find person, current user (2 tools) | -| **Files (WebDAV)** | ✅ Full filesystem access (12 tools) | ✅ Read, folder tree, sharing (3 tools) | -| **Document Processing** | ✅ OCR with progress (PDF, DOCX, images) | ❌ Not implemented | -| **Deck** | ✅ Full project management (15 tools) | ✅ Basic board/card ops (2 tools) | -| **Tables** | ✅ Row operations (5 tools) | ❌ Not implemented | -| **Cookbook** | ✅ Full recipe management (13 tools) | ❌ Not implemented | -| **Talk** | ❌ Not implemented | ✅ Messages, conversations (4 tools) | -| **Mail** | ❌ Not implemented | ✅ Send email (2 tools) | -| **AI Features** | ❌ Not implemented | ✅ Image gen, transcription, doc gen (4 tools) | -| **Web/Maps** | ❌ Not implemented | ✅ Search, weather, transit (5 tools) | -| **MCP Resources** | ✅ Structured data URIs | ❌ Not supported | -| **External MCP** | ❌ Pure server | ✅ Consumes external MCP servers | -| **Safety Model** | Client-controlled | Built-in safe/dangerous distinction | -| **Best For** | • Deep CRUD operations
• External integrations
• OAuth security
• IDE/editor integration | • AI-driven actions in Nextcloud UI
• Multi-service orchestration
• User task automation
• MCP aggregation hub | - -See our [detailed comparison](docs/comparison-context-agent.md) for architecture diagrams, workflow examples, and guidance on when to use each approach. - -Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request! - -### Authentication - -| Mode | Security | Best For | -|------|----------|----------| -| **OAuth2/OIDC** ⚠️ **Experimental** | 🔒 High | Testing, evaluation (requires patch for app-specific APIs) | -| **Basic Auth** ✅ | Lower | Development, testing, production | - -> [!IMPORTANT] -> **OAuth is experimental** and requires a manual patch to the `user_oidc` app for full functionality: -> - **Required patch**: `user_oidc` app needs modifications for Bearer token support ([issue #1221](https://github.com/nextcloud/user_oidc/issues/1221)) -> - **Impact**: Without the patch, most app-specific APIs (Notes, Calendar, Contacts, Deck, etc.) will fail with 401 errors -> - **What works without patches**: OAuth flow, PKCE support (with `oidc` v1.10.0+), OCS APIs -> - **Production use**: Wait for upstream patch to be merged into official releases -> -> See [OAuth Upstream Status](docs/oauth-upstream-status.md) for detailed information on required patches and workarounds. - -OAuth2/OIDC provides secure, per-user authentication with access tokens. See [Authentication Guide](docs/authentication.md) for details. +> **Looking for AI features inside Nextcloud?** Nextcloud also provides [Context Agent](https://github.com/nextcloud/context_agent), which powers the Assistant app and runs as an ExApp inside Nextcloud. See [docs/comparison-context-agent.md](docs/comparison-context-agent.md) for a detailed comparison of use cases. ## Quick Start -### 1. Install +Get up and running in 60 seconds using Docker: ```bash -# Clone the repository -git clone https://github.com/cbcoutinho/nextcloud-mcp-server.git -cd nextcloud-mcp-server - -# Install with uv (recommended) -uv sync - -# Or using Docker -docker pull ghcr.io/cbcoutinho/nextcloud-mcp-server:latest - -# Or deploy to Kubernetes with Helm -helm repo add nextcloud-mcp https://cbcoutinho.github.io/nextcloud-mcp-server -helm repo update -helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \ - --set nextcloud.host=https://cloud.example.com \ - --set auth.basic.username=myuser \ - --set auth.basic.password=mypassword -``` - -See [Installation Guide](docs/installation.md) for detailed instructions, or [Helm Chart README](charts/nextcloud-mcp-server/README.md) for Kubernetes deployment. - -### 2. Configure - -Create a `.env` file: - -```bash -# Copy the sample -cp env.sample .env -``` - -**For Basic Auth (recommended for most users):** -```dotenv +# 1. Create a minimal configuration +cat > .env << EOF NEXTCLOUD_HOST=https://your.nextcloud.instance.com NEXTCLOUD_USERNAME=your_username NEXTCLOUD_PASSWORD=your_app_password -``` +EOF -**For OAuth (experimental - requires patches):** -```dotenv -NEXTCLOUD_HOST=https://your.nextcloud.instance.com -``` - -See [Configuration Guide](docs/configuration.md) for all options. - -### 3. Set Up Authentication - -**Basic Auth Setup (recommended):** -1. Create an app password in Nextcloud (Settings → Security → Devices & sessions) -2. Add credentials to `.env` file -3. Start the server - -**OAuth Setup (experimental):** -1. Install Nextcloud OIDC apps (`oidc` v1.10.0+ + `user_oidc`) -2. **Apply required patch** to `user_oidc` app for Bearer token support (see [OAuth Upstream Status](docs/oauth-upstream-status.md)) -3. Enable dynamic client registration or create an OIDC client with id & secret -4. Configure Bearer token validation in `user_oidc` -5. Start the server - -See [OAuth Quick Start](docs/quickstart-oauth.md) for 5-minute setup or [OAuth Setup Guide](docs/oauth-setup.md) for detailed instructions. - -### 4. Run the Server - -```bash -# Load environment variables -export $(grep -v '^#' .env | xargs) - -# Start with Basic Auth (default) -uv run nextcloud-mcp-server - -# Or start with OAuth (experimental - requires patches) -uv run nextcloud-mcp-server --oauth - -# Or with Docker +# 2. Start the server docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \ ghcr.io/cbcoutinho/nextcloud-mcp-server:latest + +# 3. Test the connection +curl http://127.0.0.1:8000/health ``` -The server starts on `http://127.0.0.1:8000` by default. +**Next Steps:** +- Create an app password in Nextcloud: Settings → Security → Devices & sessions +- Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.) +- See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes) -See [Running the Server](docs/running.md) for more options. +## Key Features -### 5. Connect an MCP Client +- **90+ MCP Tools** - Comprehensive API coverage across 8 Nextcloud apps +- **MCP Resources** - Structured data URIs for browsing Nextcloud data +- **Semantic Search (Experimental)** - Optional vector-powered search for Notes (requires Qdrant + Ollama) +- **Document Processing** - OCR and text extraction from PDFs, DOCX, images with progress notifications +- **Flexible Deployment** - Docker, Kubernetes (Helm), VM, or local installation +- **Production-Ready Auth** - Basic Auth with app passwords (recommended) or OAuth2/OIDC (experimental) +- **Multiple Transports** - SSE, HTTP, and streamable-http support -Test with MCP Inspector: +## Supported Apps -```bash -uv run mcp dev -``` +| App | Tools | Capabilities | +|-----|-------|--------------| +| **Notes** | 7 | Full CRUD, keyword search, semantic search | +| **Calendar** | 20+ | Events, todos (tasks), recurring events, attendees, availability | +| **Contacts** | 8 | Full CardDAV support, address books | +| **Files (WebDAV)** | 12 | Filesystem access, OCR/document processing | +| **Deck** | 15 | Boards, stacks, cards, labels, assignments | +| **Cookbook** | 13 | Recipe management, URL import (schema.org) | +| **Tables** | 5 | Row operations on Nextcloud Tables | +| **Sharing** | 10+ | Create and manage shares | +| **Semantic Search** | 2+ | Vector search for Notes (experimental, opt-in, requires infrastructure) | -Or connect from: -- Claude Desktop -- Any MCP-compatible client +Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request! + +## Authentication + +> [!IMPORTANT] +> **OAuth2/OIDC is experimental** and requires a manual patch to the `user_oidc` app: +> - **Required patch**: Bearer token support ([issue #1221](https://github.com/nextcloud/user_oidc/issues/1221)) +> - **Impact**: Without the patch, most app-specific APIs fail with 401 errors +> - **Recommendation**: Use Basic Auth for production until upstream patches are merged +> +> See [docs/oauth-upstream-status.md](docs/oauth-upstream-status.md) for patch status and workarounds. + +**Recommended:** Basic Auth with app-specific passwords provides secure, production-ready authentication. See [docs/authentication.md](docs/authentication.md) for setup details and OAuth configuration. + +### Authentication Modes + +The server supports two authentication modes: + +**Single-User Mode (BasicAuth):** +- One set of credentials shared by all MCP clients +- Simple setup: username + app password in environment variables +- All clients access Nextcloud as the same user +- Best for: Personal use, development, single-user deployments + +**Multi-User Mode (OAuth):** +- Each MCP client authenticates separately with their own Nextcloud account +- Per-user scopes and permissions (clients only see tools they're authorized for) +- More secure: tokens expire, credentials never shared with server +- Best for: Teams, multi-user deployments, production environments with multiple users + +See [docs/authentication.md](docs/authentication.md) for detailed setup instructions. + +## Semantic Search + +The server provides an experimental RAG pipeline to enable _Semantic Search_ that enables MCP clients to find information in Nextcloud based on **meaning** rather than just keywords. Instead of matching "machine learning" only when those exact words appear, it understands that "neural networks," "AI models," and "deep learning" are semantically related concepts. + +**Example:** +- **Keyword search**: Query "car" only finds notes containing "car" +- **Semantic search**: Query "car" also finds notes about "automobile," "vehicle," "sedan," "transportation" + +This enables natural language queries and helps discover related content across your Nextcloud notes. + +> [!NOTE] +> **Semantic Search is experimental and opt-in:** +> - Disabled by default (`VECTOR_SYNC_ENABLED=false`) +> - Currently supports Notes app only (multi-app support planned) +> - Requires additional infrastructure: vector database + embedding service +> - Answer generation (`nc_semantic_search_answer`) requires MCP client sampling support +> +> See [docs/semantic-search-architecture.md](docs/semantic-search-architecture.md) for architecture details and [docs/configuration.md](docs/configuration.md) for setup instructions. ## Documentation ### Getting Started -- **[Installation](docs/installation.md)** - Install the server -- **[Configuration](docs/configuration.md)** - Environment variables and settings -- **[Authentication](docs/authentication.md)** - OAuth vs BasicAuth -- **[Running the Server](docs/running.md)** - Start and manage the server +- **[Installation](docs/installation.md)** - Docker, Kubernetes, local, or VM deployment +- **[Configuration](docs/configuration.md)** - Environment variables and advanced options +- **[Authentication](docs/authentication.md)** - Basic Auth vs OAuth2/OIDC setup +- **[Running the Server](docs/running.md)** - Start, manage, and troubleshoot -### Architecture -- **[Comparison with Context Agent](docs/comparison-context-agent.md)** - How this MCP server differs from Nextcloud's Context Agent +### Features +- **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables +- **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup +- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in) -### OAuth Documentation (Experimental) -- **[OAuth Quick Start](docs/quickstart-oauth.md)** - 5-minute setup guide -- **[OAuth Setup Guide](docs/oauth-setup.md)** - Detailed setup instructions -- **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works -- **[OAuth Troubleshooting](docs/oauth-troubleshooting.md)** - OAuth-specific issues -- **[Upstream Status](docs/oauth-upstream-status.md)** - **Required patches and PRs** ⚠️ - -### Reference +### Advanced Topics +- **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works (experimental) +- **[OAuth Quick Start](docs/quickstart-oauth.md)** - 5-minute OAuth setup +- **[OAuth Setup Guide](docs/oauth-setup.md)** - Detailed OAuth configuration - **[Troubleshooting](docs/troubleshooting.md)** - Common issues and solutions - -### App-Specific Documentation -- [Notes API](docs/notes.md) -- [Calendar (CalDAV)](docs/calendar.md) -- [Contacts (CardDAV)](docs/contacts.md) -- [Cookbook](docs/cookbook.md) -- [Deck](docs/deck.md) -- [Tables](docs/table.md) -- [WebDAV](docs/webdav.md) - -## MCP Tools & Resources - -The server exposes Nextcloud functionality through MCP tools (for actions) and resources (for data browsing). - -### Tools - -The server provides 90+ tools across 8 Nextcloud apps. When using OAuth, tools are dynamically filtered based on your granted scopes. - -For a complete list of all supported OAuth scopes and their descriptions, see [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes). - -#### Available Tool Categories - -| App | Tools | Read Scope | Write Scope | Operations | -|-----|-------|-----------|-------------|------------| -| **Notes** | 7 | `notes:read` | `notes:write` | Create, read, update, delete, search notes (keyword search) | -| **Calendar** | 20+ | `calendar:read` `todo:read` | `calendar:write` `todo:write` | Events, todos (tasks), calendars, recurring events, attendees | -| **Contacts** | 8 | `contacts:read` | `contacts:write` | Create, read, update, delete contacts and address books | -| **Files (WebDAV)** | 12 | `files:read` | `files:write` | List, read, upload, delete, move files; **OCR/document processing** | -| **Deck** | 15 | `deck:read` | `deck:write` | Boards, stacks, cards, labels, assignments | -| **Cookbook** | 13 | `cookbook:read` | `cookbook:write` | Recipes, import from URLs, search, categories | -| **Tables** | 5 | `tables:read` | `tables:write` | Row operations on Nextcloud Tables | -| **Sharing** | 10+ | `sharing:read` | `sharing:write` | Create, manage, delete shares | -| **Semantic Search** | 2+ | `semantic:read` | `semantic:write` | Vector-powered semantic search across **all apps** (notes, calendar, deck, files, contacts), background indexing | - -#### Document Processing (Optional) - -The WebDAV file reading tool (`nc_webdav_read_file`) supports **automatic text extraction** from documents and images: - -**Supported Formats:** -- **Documents**: PDF, DOCX, PPTX, XLSX, RTF, ODT, EPUB -- **Images**: PNG, JPEG, TIFF, BMP (with OCR) -- **Email**: EML, MSG files - -**Features:** -- **Progress Notifications**: Long-running OCR operations (up to 120s) send progress updates every 10 seconds to prevent client timeouts -- **Pluggable Architecture**: Multiple processor backends (Unstructured.io, Tesseract, custom HTTP APIs) -- **Automatic Detection**: Files are processed based on MIME type -- **Graceful Fallback**: Returns base64-encoded content if processing fails - -**Configuration:** -```dotenv -# Enable document processing (optional) -ENABLE_DOCUMENT_PROCESSING=true - -# Unstructured.io processor (cloud/API-based, supports many formats) -ENABLE_UNSTRUCTURED=true -UNSTRUCTURED_API_URL=http://localhost:8002 -UNSTRUCTURED_STRATEGY=auto # auto, fast, or hi_res -UNSTRUCTURED_LANGUAGES=eng,deu -PROGRESS_INTERVAL=10 # Progress update interval in seconds - -# Tesseract processor (local OCR, images only) -ENABLE_TESSERACT=false -TESSERACT_LANG=eng - -# Custom HTTP processor -ENABLE_CUSTOM_PROCESSOR=false -CUSTOM_PROCESSOR_URL=http://localhost:9000/process -CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg -``` - -**Example Usage:** -``` -AI: "Read the contents of Documents/report.pdf" -→ Uses nc_webdav_read_file tool with automatic OCR processing -→ Returns extracted text with parsing metadata -→ Sends progress updates during long operations -``` - -See [env.sample](env.sample) for complete configuration options. - -**Example Tools:** -- `nc_notes_create_note` - Create a new note -- `nc_cookbook_import_recipe` - Import recipes from URLs with schema.org metadata -- `deck_create_card` - Create a Deck card -- `nc_calendar_create_event` - Create a calendar event -- `nc_calendar_create_todo` - Create a CalDAV task/todo -- `nc_contacts_create_contact` - Create a contact -- `nc_webdav_upload_file` - Upload a file to Nextcloud -- And 80+ more... - -> [!TIP] -> **OAuth Scope Filtering**: When connecting via OAuth, MCP clients will only see tools for which you've granted access. For example, granting only `notes:read` and `notes:write` will show 7 Notes tools instead of all 90+ tools. See [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes) for the complete scope reference, or [OAuth Troubleshooting - Limited Scopes](docs/oauth-troubleshooting.md#limited-scopes---only-seeing-notes-tools) if you're only seeing a subset of tools. -> -> **Known Issue**: Claude Code and some other MCP clients may only request/grant Notes scopes during initial connection. Track progress at [#234](https://github.com/cbcoutinho/nextcloud-mcp-server/issues/234). - -### Resources -Resources provide read-only access to Nextcloud data: -- `nc://capabilities` - Server capabilities -- `cookbook://version` - Cookbook app version info -- `nc://Deck/boards/{board_id}` - Deck board data -- `notes://settings` - Notes app settings -- And more... - -Run `uv run nextcloud-mcp-server --help` to see all available options. +- **[Comparison with Context Agent](docs/comparison-context-agent.md)** - When to use each approach ## Examples @@ -291,45 +139,31 @@ AI: "Create a note called 'Meeting Notes' with today's agenda" → Uses nc_notes_create_note tool ``` -### Manage Recipes +### Import Recipes ``` -AI: "Import the recipe from this URL: https://www.example.com/recipe/chocolate-cake" -→ Uses nc_cookbook_import_recipe tool to extract schema.org metadata +AI: "Import the recipe from https://www.example.com/recipe/chocolate-cake" +→ Uses nc_cookbook_import_recipe tool with schema.org metadata extraction ``` -### Manage Calendar +### Schedule Meetings ``` AI: "Schedule a team meeting for next Tuesday at 2pm" → Uses nc_calendar_create_event tool ``` -### Organize Files +### Manage Files ``` AI: "Create a folder called 'Project X' and move all PDFs there" -→ Uses WebDAV tools (nc_webdav_create_directory, nc_webdav_move) +→ Uses nc_webdav_create_directory and nc_webdav_move tools ``` -### Project Management +### Semantic Search (Experimental, Opt-in) ``` -AI: "Create a new Deck board for Q1 planning with Todo, In Progress, and Done stacks" -→ Uses deck_create_board and deck_create_stack tools +AI: "Find notes related to machine learning concepts" +→ Uses nc_semantic_search to find semantically similar notes (requires Qdrant + Ollama setup) ``` -## Transport Protocols - -The server supports multiple MCP transport protocols: - -- **streamable-http** (recommended) - Modern streaming protocol -- **sse** (default, deprecated) - Server-Sent Events for backward compatibility -- **http** - Standard HTTP protocol - -```bash -# Use streamable-http (recommended) -uv run nextcloud-mcp-server --transport streamable-http -``` - -> [!WARNING] -> SSE transport is deprecated and will be removed in a future MCP specification version. Please migrate to `streamable-http`. +**Note:** For AI-generated answers with citations, use `nc_semantic_search_answer` (requires MCP client with sampling support). ## Contributing @@ -337,17 +171,17 @@ Contributions are welcome! - Report bugs or request features: [GitHub Issues](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) - Submit improvements: [Pull Requests](https://github.com/cbcoutinho/nextcloud-mcp-server/pulls) -- Read [CLAUDE.md](CLAUDE.md) for development guidelines +- Development guidelines: [CLAUDE.md](CLAUDE.md) ## Security [![MseeP.ai Security Assessment](https://mseep.net/pr/cbcoutinho-nextcloud-mcp-server-badge.png)](https://mseep.ai/app/cbcoutinho-nextcloud-mcp-server) This project takes security seriously: -- OAuth2/OIDC support (experimental - requires upstream patches) -- Basic Auth with app-specific passwords (recommended) -- No credential storage with OAuth mode +- Production-ready Basic Auth with app-specific passwords +- OAuth2/OIDC support (experimental, requires upstream patches) - Per-user access tokens +- No credential storage in OAuth mode - Regular security assessments Found a security issue? Please report it privately to the maintainers. diff --git a/docker-compose.yml b/docker-compose.yml index a3199a1..7223233 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -88,20 +88,27 @@ services: - VECTOR_SYNC_SCAN_INTERVAL=10 - VECTOR_SYNC_PROCESSOR_WORKERS=1 - - LOG_FORMAT=json + - LOG_FORMAT=text # Qdrant configuration (three modes): # 1. Network mode: Set QDRANT_URL=http://qdrant:6333 (requires qdrant service) # 2. In-memory mode: Set QDRANT_LOCATION=:memory: (default if nothing set) # 3. Persistent local: Set QDRANT_LOCATION=/app/data/qdrant (stored in mcp-data volume) - - QDRANT_LOCATION=/app/data/qdrant - # - QDRANT_URL=http://qdrant:6333 # Uncomment for network mode - # - QDRANT_API_KEY=${QDRANT_API_KEY:-my_secret_api_key} # Only for network mode + #- QDRANT_LOCATION=/app/data/qdrant + - QDRANT_URL=http://qdrant:6333 # Uncomment for network mode + - QDRANT_API_KEY=${QDRANT_API_KEY:-my_secret_api_key} # Only for network mode + + # Collection naming: Auto-generated as {deployment-id}-{model-name} + # - Deployment ID: OTEL_SERVICE_NAME (if set) or hostname (fallback) + # - Model name: OLLAMA_EMBEDDING_MODEL + # - Example: "nextcloud-mcp-server-nomic-embed-text" + # - Changing models creates new collection (requires re-embedding) + # - Set QDRANT_COLLECTION to override auto-generation: - QDRANT_COLLECTION=nextcloud_content # Ollama configuration (optional - uses SimpleEmbeddingProvider if not set) - # - OLLAMA_BASE_URL=http://your-ollama-endpoint:port - # - OLLAMA_EMBEDDING_MODEL=nomic-embed-text + - OLLAMA_BASE_URL=https://ollama.internal.coutinho.io:443 + - OLLAMA_EMBEDDING_MODEL=nomic-embed-text # Changing this creates new collection # - OLLAMA_VERIFY_SSL=false mcp-oauth: @@ -207,7 +214,7 @@ services: - keycloak-oauth-storage:/app/.oauth qdrant: - image: qdrant/qdrant:latest + image: qdrant/qdrant:v1.15.5 restart: always ports: - 127.0.0.1:6333:6333 # REST API diff --git a/docs/configuration.md b/docs/configuration.md index 8ae452f..f7a6d6a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -178,6 +178,111 @@ VECTOR_SYNC_ENABLED=true - Requires separate Qdrant service - More complex deployment +### Qdrant Collection Naming + +Collection names are automatically generated to include the embedding model, ensuring safe model switching and preventing dimension mismatches. + +#### Auto-Generated Naming (Default) + +**Format:** `{deployment-id}-{model-name}` + +**Components:** +- **Deployment ID:** `OTEL_SERVICE_NAME` (if configured) or `hostname` (fallback) +- **Model name:** `OLLAMA_EMBEDDING_MODEL` + +**Examples:** + +```bash +# With OTEL service name configured +OTEL_SERVICE_NAME=my-mcp-server +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# → Collection: "my-mcp-server-nomic-embed-text" + +# Simple Docker deployment (OTEL not configured) +# hostname=mcp-container +OLLAMA_EMBEDDING_MODEL=all-minilm +# → Collection: "mcp-container-all-minilm" +``` + +#### Switching Embedding Models + +When you change `OLLAMA_EMBEDDING_MODEL`, a new collection is automatically created: + +```bash +# Initial setup +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# Collection: "my-server-nomic-embed-text" (768 dimensions) + +# Change model +OLLAMA_EMBEDDING_MODEL=all-minilm +# Collection: "my-server-all-minilm" (384 dimensions) +# → New collection created, full re-embedding occurs +``` + +**Important:** +- **Collections are mutually exclusive** - vectors cannot be shared between different embedding models +- **Switching models requires re-embedding** all documents (may take time for large note collections) +- **Old collection remains** in Qdrant and can be deleted manually if no longer needed + +#### Explicit Override + +Set `QDRANT_COLLECTION` to use a specific collection name: + +```bash +QDRANT_COLLECTION=my-custom-collection # Bypasses auto-generation +``` + +**Use cases:** +- Backward compatibility with existing deployments +- Custom naming schemes +- Sharing a collection across deployments (advanced) + +#### Multi-Server Deployments + +Each server should have a unique deployment ID to avoid collection collisions: + +```bash +# Server 1 (Production) +OTEL_SERVICE_NAME=mcp-prod +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# → Collection: "mcp-prod-nomic-embed-text" + +# Server 2 (Staging) +OTEL_SERVICE_NAME=mcp-staging +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# → Collection: "mcp-staging-nomic-embed-text" + +# Server 3 (Different model) +OTEL_SERVICE_NAME=mcp-experimental +OLLAMA_EMBEDDING_MODEL=bge-large +# → Collection: "mcp-experimental-bge-large" +``` + +**Benefits:** +- Multiple MCP servers can share one Qdrant instance safely +- No naming collisions between deployments +- Clear collection ownership (can see which deployment and model) + +#### Dimension Validation + +The server validates collection dimensions on startup: + +``` +Dimension mismatch for collection 'my-server-nomic-embed-text': + Expected: 384 (from embedding model 'all-minilm') + Found: 768 +This usually means you changed the embedding model. +Solutions: + 1. Delete the old collection: Collection will be recreated with new dimensions + 2. Set QDRANT_COLLECTION to use a different collection name + 3. Revert OLLAMA_EMBEDDING_MODEL to the original model +``` + +**What this prevents:** +- Runtime errors from dimension mismatches +- Data corruption in Qdrant +- Confusing error messages during indexing + ### Vector Sync Configuration Control background indexing behavior: diff --git a/docs/semantic-search-architecture.md b/docs/semantic-search-architecture.md new file mode 100644 index 0000000..8738cd8 --- /dev/null +++ b/docs/semantic-search-architecture.md @@ -0,0 +1,874 @@ +# Semantic Search Architecture + +This document explains the architecture of the semantic search feature in the Nextcloud MCP Server, including background synchronization, vector search, and optional AI-generated answers via MCP sampling. + +> [!IMPORTANT] +> **Status: Experimental** +> - Disabled by default (`VECTOR_SYNC_ENABLED=false`) +> - Currently supports **Notes app only** (multi-app architecture ready, additional apps planned) +> - Requires additional infrastructure (Qdrant vector database + Ollama embedding service) +> - RAG answer generation requires MCP client sampling support + +## Overview + +### What is Semantic Search? + +**Semantic search** finds information based on **meaning** rather than exact keyword matches. It uses vector embeddings to understand that "car" and "automobile" are similar, or that "bread recipe" matches "how to bake bread." + +**Traditional keyword search:** +``` +Query: "machine learning" +Matches: Only notes containing "machine learning" exactly +Misses: Notes with "neural networks", "AI models", "deep learning" +``` + +**Semantic search:** +``` +Query: "machine learning" +Matches: Notes about machine learning, neural networks, AI, deep learning, etc. +Understanding: Semantic similarity via vector embeddings +``` + +### Why It Matters + +Semantic search enables: +- **Natural language queries** - Ask questions in plain language +- **Conceptual discovery** - Find related content even with different terminology +- **Cross-reference insights** - Connect ideas across your knowledge base +- **AI-powered answers** - Generate summaries with citations (optional, requires MCP sampling) + +### Current Support + +- **Supported Apps**: Notes (fully implemented) +- **Planned Apps**: Calendar events, Calendar tasks, Deck cards, Files (with text extraction), Contacts +- **Architecture**: Multi-app plugin system ready, awaiting implementation + +## System Components + +```mermaid +graph TB + subgraph "MCP Client" + Client[Claude Desktop, IDEs, etc.] + end + + subgraph "Nextcloud MCP Server" + MCP[MCP Server] + Scanner[Background Scanner
Hourly Change Detection] + Queue[Document Queue] + Processor[Embedding Processors
Concurrent Workers] + end + + subgraph "Infrastructure" + Qdrant[(Qdrant
Vector Database)] + Ollama[Ollama
Embedding Service] + NC[Nextcloud
Notes API, CalDAV, etc.] + end + + Client <-->|MCP Protocol| MCP + Scanner -->|Fetch Changes| NC + Scanner -->|Enqueue Documents| Queue + Queue -->|Process Batch| Processor + Processor -->|Generate Embeddings| Ollama + Processor -->|Store Vectors| Qdrant + MCP -->|Search Queries| Qdrant + MCP -->|Verify Access| NC +``` + +**Component Roles:** + +- **MCP Server**: Exposes semantic search tools (`nc_semantic_search`, `nc_semantic_search_answer`, `nc_get_vector_sync_status`) +- **Background Scanner**: Discovers changed documents every hour using ETag-based change detection +- **Document Queue**: Holds pending documents for embedding generation +- **Embedding Processors**: Generate vector embeddings via Ollama (concurrent workers) +- **Qdrant Vector Database**: Stores document vectors with metadata and user_id filtering +- **Ollama Embedding Service**: Converts text to 768-dimensional vectors (default: `nomic-embed-text` model) +- **Nextcloud APIs**: Source of truth for documents and access control verification + +## How It Works: Background Synchronization + +Background synchronization runs automatically when `VECTOR_SYNC_ENABLED=true`, discovering changes and indexing documents without user intervention. + +```mermaid +sequenceDiagram + participant Timer + participant Scanner + participant NC as Nextcloud API + participant Queue + participant Processor + participant Ollama + participant Qdrant + + Timer->>Scanner: Trigger (hourly) + Scanner->>NC: Fetch all notes
(Notes API) + NC-->>Scanner: Notes with ETags + Scanner->>Qdrant: Check indexed documents + Qdrant-->>Scanner: Existing ETags + Scanner->>Scanner: Identify changes
(new/modified/deleted) + Scanner->>Queue: Enqueue changed docs + + loop Continuous Processing + Processor->>Queue: Fetch batch + Queue-->>Processor: Documents + Processor->>Ollama: Generate embeddings + Ollama-->>Processor: 768-dim vectors + Processor->>Qdrant: Upsert vectors
(with user_id, doc_type) + end +``` + +### Scanner Behavior + +**Hourly Trigger:** +- Runs every hour (configurable) +- Fetches all notes from Nextcloud Notes API +- Compares ETags with Qdrant's indexed state +- Enqueues new/modified documents + +**Change Detection:** +- **New documents**: No entry in Qdrant → enqueue for indexing +- **Modified documents**: ETag mismatch → enqueue for re-indexing +- **Deleted documents**: In Qdrant but not in Nextcloud → delete from Qdrant + +**Multi-App Plugin Architecture:** +```python +# Each app implements DocumentScanner interface +class NotesScanner(DocumentScanner): + async def scan(self) -> list[Document]: + # Fetch notes, detect changes, return documents +``` + +Currently only `NotesScanner` is implemented. Future: `CalendarScanner`, `DeckScanner`, `FilesScanner`, etc. + +### Queue Processing + +**Document Queue:** +- In-memory FIFO queue (not persistent across restarts) +- Holds documents pending embedding generation +- Batch processing for efficiency + +**Processor Pool:** +- Concurrent workers using `anyio.TaskGroup` +- Process documents in parallel (default: 4 workers) +- Each worker: fetch document → generate embedding → store in Qdrant + +**Backpressure Handling:** +- Queue size limits prevent memory exhaustion +- Slow consumers (Ollama) naturally pace the system + +### Vector Storage + +**Qdrant Collection Schema:** +``` +{ + "id": "note_123", + "vector": [768 dimensions], + "payload": { + "user_id": "alice", + "doc_type": "note", + "doc_id": "123", + "title": "Machine Learning Notes", + "content": "Neural networks are...", + "etag": "abc123", + "last_modified": "2025-01-15T10:30:00Z" + } +} +``` + +**Key Fields:** +- `user_id`: Multi-tenancy filtering (each user's vectors isolated) +- `doc_type`: App identifier ("note", "event", "card", etc.) +- `etag`: Change detection for incremental updates + +### Collection Naming and Model Switching + +**Auto-generated collection names:** +- **Format:** `{deployment-id}-{model-name}` +- **Deployment ID:** `OTEL_SERVICE_NAME` (if configured) or `hostname` (fallback) +- **Model name:** `OLLAMA_EMBEDDING_MODEL` +- **Example:** `"my-mcp-server-nomic-embed-text"`, `"mcp-container-all-minilm"` + +**Why model-based naming:** +- Ensures each embedding model gets its own collection +- Prevents dimension mismatches when switching models +- Enables safe model experimentation (new model = new collection) +- Supports multi-server deployments (different deployment IDs) + +**Switching embedding models:** + +Collections are **mutually exclusive** - vectors from one embedding model cannot be used with another. When you change the embedding model: + +1. **New collection is created** with the new model's dimensions +2. **Full re-embedding occurs** - scanner processes all documents again +3. **Old collection remains** - can be deleted manually if no longer needed +4. **Dimension validation** - server fails fast if collection dimension doesn't match model + +**Example workflow:** +```bash +# Start with nomic-embed-text (768 dimensions) +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# Collection: "my-server-nomic-embed-text" +# → Scanner indexes 1000 notes → 1000 vectors in collection + +# Switch to all-minilm (384 dimensions) +OLLAMA_EMBEDDING_MODEL=all-minilm +# Collection: "my-server-all-minilm" +# → Scanner detects 0 indexed documents → re-embeds 1000 notes +# → Old collection "my-server-nomic-embed-text" still exists in Qdrant +``` + +**Re-embedding performance:** +- CPU-only: 1-5 notes/second +- With GPU: 50-200 notes/second +- 1000 notes: 3-16 minutes (CPU) or 5-20 seconds (GPU) + +**Multi-server deployments:** + +Multiple MCP servers can share one Qdrant instance safely: + +```bash +# Server 1 (Production) +OTEL_SERVICE_NAME=mcp-prod +OLLAMA_EMBEDDING_MODEL=nomic-embed-text +# → Collection: "mcp-prod-nomic-embed-text" + +# Server 2 (Staging with different model) +OTEL_SERVICE_NAME=mcp-staging +OLLAMA_EMBEDDING_MODEL=all-minilm +# → Collection: "mcp-staging-all-minilm" +``` + +Each deployment gets its own collection - no naming collisions or dimension conflicts. + +## How It Works: Semantic Search + +Semantic search converts user queries into vectors and finds similar documents using cosine similarity. + +```mermaid +sequenceDiagram + participant User + participant MCP as MCP Server + participant Ollama + participant Qdrant + participant NC as Nextcloud API + + User->>MCP: nc_semantic_search("machine learning") + MCP->>MCP: Check OAuth scope
(semantic:read) + MCP->>Ollama: Generate query embedding + Ollama-->>MCP: Query vector (768-dim) + MCP->>Qdrant: Search similar vectors
(filter: user_id=alice) + Qdrant-->>MCP: Top K results
(with similarity scores) + + loop For each result + MCP->>NC: Verify access
(fetch note by ID) + alt Access granted + NC-->>MCP: Note metadata + else Access denied (404/401) + MCP->>MCP: Filter out result + end + end + + MCP-->>User: Search results
(with scores, excerpts) +``` + +### Dual-Phase Authorization + +**Phase 1: OAuth Scope Check** +- Verify user has `semantic:read` scope +- Rejects unauthorized users immediately + +**Phase 2: Per-Document Verification** +- For each search result, fetch document via app API (Notes, Calendar, etc.) +- If fetch succeeds (200 OK), user has access +- If fetch fails (404 Not Found, 401 Unauthorized), filter out result +- **Security**: Prevents information leakage from vector search alone + +**Rationale:** +- Vector database doesn't know about sharing, permissions changes, or deleted documents +- App APIs are source of truth for access control +- Verification ensures users only see documents they can access + +### Search Flow + +1. **Query Embedding**: Convert user query to 768-dimensional vector via Ollama +2. **Vector Search**: Find top K similar vectors in Qdrant (cosine similarity) +3. **User Filtering**: Qdrant pre-filters by `user_id` (multi-tenancy) +4. **Access Verification**: Fetch each document via app API to verify current access +5. **Result Ranking**: Return results sorted by similarity score +6. **Response**: Include document excerpts, metadata, and similarity scores + +### Performance + +- **Query latency**: 50-200ms typical (embedding + vector search + verification) +- **Accuracy**: Depends on embedding model quality (`nomic-embed-text` recommended) +- **Scalability**: Qdrant handles millions of vectors efficiently + +## How It Works: RAG with MCP Sampling (Optional) + +The `nc_semantic_search_answer` tool generates AI-powered answers with citations using **MCP sampling** - requesting the MCP client's LLM to generate text. + +```mermaid +sequenceDiagram + participant User + participant MCP as MCP Server + participant Client as MCP Client
(Claude Desktop) + participant LLM as Client's LLM
(Claude, GPT, etc.) + + User->>MCP: nc_semantic_search_answer("What are my Q1 goals?") + MCP->>MCP: Semantic search
(find relevant notes) + MCP->>MCP: Construct prompt
(query + documents + instructions) + MCP->>Client: Sampling request
(MCP Protocol) + Client->>User: Prompt for approval
(optional, client-controlled) + User-->>Client: Approve + Client->>LLM: Generate answer
(with context) + LLM-->>Client: Answer with citations + Client-->>MCP: Sampling response + MCP-->>User: Generated answer
(with source documents) +``` + +### MCP Sampling Architecture + +**Why MCP Sampling?** +- **No server-side LLM**: MCP server has no API keys, doesn't call LLMs directly +- **Client controls everything**: Which model, who pays, user approval prompts +- **Privacy**: Documents stay with the client's LLM provider, not a third-party +- **Flexibility**: Works with any MCP client that supports sampling (Claude Desktop, future clients) + +**Prompt Construction:** +``` +User Query: {query} + +Relevant Documents: +1. Document: {title} (Note) + Content: {excerpt} + +2. Document: {title} (Note) + Content: {excerpt} + +Instructions: +- Provide a comprehensive answer to the user's query +- Use the documents above as context +- Include citations: "According to Document 1 (title)..." +- If documents don't contain enough information, say so +``` + +**Graceful Fallback:** +```python +try: + result = await ctx.session.create_message(...) + return answer_with_citations +except Exception as e: + # Fallback: Return documents without generated answer + return SearchResponse( + generated_answer=f"[Sampling unavailable: {e}]", + sources=search_results + ) +``` + +**Client Support:** +- **Requires**: MCP client with sampling capability +- **Known support**: Claude Desktop (as of Claude 3.5+) +- **Graceful degradation**: Returns raw documents if sampling unavailable + +## Authentication & Security + +### OAuth Scopes + +**`semantic:read`** - Search permission +- Allows using `nc_semantic_search` and `nc_semantic_search_answer` tools +- Does NOT grant access to documents (verified via app APIs) +- Required for any semantic search operation + +**`semantic:write`** - Sync control permission +- Allows enabling/disabling background sync (`provision_vector_sync`, `deprovision_vector_sync`) +- Controls whether user's documents are indexed +- Currently not implemented in OAuth mode (BasicAuth only) + +### Dual-Phase Authorization Pattern + +**Phase 1: Scope Check** (semantic:read) +- Verifies user authorized to search +- Prevents unauthorized vector database access + +**Phase 2: Document Verification** (app-specific APIs) +- For each search result, fetch via Notes API, CalDAV, etc. +- If user can fetch → include in results +- If user cannot fetch (404/401) → filter out +- **Security**: Vector search cannot leak documents user shouldn't see + +**Example Scenario:** +1. Alice creates note "Secret Project X" +2. Background sync indexes note with `user_id=alice` +3. Bob searches for "project" +4. Vector search finds "Secret Project X" (vector similarity) +5. Qdrant filters by `user_id=bob` → no match (Alice's note excluded) +6. Even if Bob somehow got the doc_id, Phase 2 verification would fail (404 Not Found) + +### Offline Access for Background Sync + +**Why needed:** +- Background scanner runs hourly without user interaction +- Requires valid access tokens to fetch documents from Nextcloud APIs +- User's session token expires after hours/days + +**OAuth Mode (ADR-004 Flow 2):** +- User explicitly provisions offline access via `provision_nextcloud_access` tool +- Server requests `offline_access` scope → receives refresh token +- Refresh token stored securely (database, encrypted) +- Background sync uses refresh tokens to obtain access tokens + +**BasicAuth Mode:** +- Username/password stored in environment variables +- Always available for background operations +- Simpler but less secure (credentials never expire) + +## Deployment Modes + +### Authentication Modes + +| Mode | Security | Offline Access | Background Sync | Best For | +|------|----------|----------------|-----------------|----------| +| **BasicAuth** | Lower (credentials in env) | Always available | ✅ Works immediately | Single-user, development, testing | +| **OAuth** | Higher (tokens, scopes) | User must provision | ⚠️ Not yet implemented | Multi-user, production | + +**BasicAuth:** +- Set `NEXTCLOUD_USERNAME` and `NEXTCLOUD_PASSWORD` +- Background sync works immediately when `VECTOR_SYNC_ENABLED=true` +- Credentials stored in `.env` file (secure server access required) + +**OAuth:** +- Client authenticates with `semantic:read` scope +- User must explicitly provision offline access (future: `provision_vector_sync` tool) +- Background sync only works for users who provisioned access +- More secure: tokens expire, user controls access + +### Qdrant Deployment Modes + +| Mode | Configuration | Persistence | Scalability | Best For | +|------|---------------|-------------|-------------|----------| +| **In-Memory** (default) | `QDRANT_LOCATION=:memory:` | ❌ Lost on restart | Single instance | Testing, development | +| **Persistent Local** | `QDRANT_LOCATION=/data/qdrant` | ✅ Survives restarts | Single instance | Small deployments | +| **Network** | `QDRANT_URL=http://qdrant:6333` | ✅ Dedicated service | ✅ Horizontal scaling | Production | + +**In-Memory Mode:** +```bash +VECTOR_SYNC_ENABLED=true +# QDRANT_LOCATION not set → defaults to :memory: +``` +- Fastest startup +- No disk I/O +- **Warning**: All vectors lost when server restarts (must re-index) + +**Persistent Local Mode:** +```bash +VECTOR_SYNC_ENABLED=true +QDRANT_LOCATION=/var/lib/qdrant +``` +- Vectors survive restarts +- Single server only (no distributed setup) +- Disk I/O for durability + +**Network Mode (Recommended for Production):** +```bash +VECTOR_SYNC_ENABLED=true +QDRANT_URL=http://qdrant:6333 +QDRANT_API_KEY=secret # optional +``` +- Dedicated Qdrant service (Docker, Kubernetes) +- Horizontal scaling (multiple MCP servers → one Qdrant) +- High availability options + +### Embedding Service Options + +| Service | Configuration | Cost | Performance | Best For | +|---------|---------------|------|-------------|----------| +| **Ollama** (recommended) | `OLLAMA_BASE_URL=http://ollama:11434` | Free (self-hosted) | Fast (local GPU) | Production, development | +| **OpenAI** (future) | `OPENAI_API_KEY=sk-...` | Paid (API) | Fast (cloud) | Cloud deployments | +| **Fallback** | No config | Free | Slow (random) | Testing only (not production) | + +**Ollama Setup (Recommended):** +```bash +# docker-compose.yml +services: + ollama: + image: ollama/ollama + volumes: + - ollama-data:/root/.ollama + ports: + - "11434:11434" + +# Pull embedding model +docker compose exec ollama ollama pull nomic-embed-text +``` + +**Environment Configuration:** +```bash +OLLAMA_BASE_URL=http://ollama:11434 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text # 768-dimensional vectors +``` + +**Model Options:** +- `nomic-embed-text` (default): 768-dim, optimized for semantic search +- `all-minilm`: Smaller, faster, slightly less accurate +- `mxbai-embed-large`: Larger, more accurate, slower + +## Configuration Overview + +### Key Environment Variables + +**Enable Semantic Search:** +```bash +VECTOR_SYNC_ENABLED=true # Default: false (opt-in) +``` + +**Qdrant Vector Database:** +```bash +# In-memory mode (default if VECTOR_SYNC_ENABLED=true) +# QDRANT_LOCATION not set → uses :memory: + +# Persistent local mode +QDRANT_LOCATION=/var/lib/qdrant + +# Network mode (production) +QDRANT_URL=http://qdrant:6333 +QDRANT_API_KEY=secret # optional +``` + +**Ollama Embedding Service:** +```bash +OLLAMA_BASE_URL=http://ollama:11434 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text # Default +``` + +**Scanner Configuration:** +```bash +VECTOR_SYNC_INTERVAL=3600 # Scan interval in seconds (default: 1 hour) +``` + +### Resource Requirements + +**Qdrant:** +- **Memory**: ~100-200 MB base + ~1 KB per vector (1M vectors ≈ 1 GB) +- **Disk**: Persistent mode only, ~200 bytes per vector +- **CPU**: Low (indexing) to moderate (search) + +**Ollama:** +- **Memory**: 2-4 GB for `nomic-embed-text` model +- **CPU**: High during embedding generation, idle otherwise +- **GPU**: Optional but recommended (10-100x faster) + +**MCP Server:** +- **Memory**: +50-100 MB for background sync workers +- **CPU**: Moderate during scanning/processing, low otherwise + +### Trade-offs + +| Consideration | In-Memory Qdrant | Persistent Qdrant | Network Qdrant | +|---------------|------------------|-------------------|----------------| +| Setup complexity | ✅ Minimal | ✅ Easy | ⚠️ Requires separate service | +| Durability | ❌ Lost on restart | ✅ Survives restarts | ✅ Survives restarts | +| Scalability | ❌ Single instance | ❌ Single instance | ✅ Horizontal scaling | +| Performance | ✅ Fastest | ✅ Fast | ⚠️ Network latency | + +## Operational Behavior + +### What Happens When VECTOR_SYNC_ENABLED=true + +**Immediate (Server Startup):** +1. MCP server connects to Qdrant (creates collection if needed) +2. MCP server connects to Ollama (verifies embedding model available) +3. Background scanner starts (schedules hourly runs) +4. Document queue and processors initialize + +**First Scan (Within 1 hour):** +1. Scanner fetches all notes from Nextcloud +2. Compares with Qdrant (likely empty on first run) +3. Enqueues all notes for indexing +4. Processors generate embeddings (may take minutes for large note collections) +5. Vectors stored in Qdrant with user_id filtering + +**Hourly Thereafter:** +1. Scanner fetches all notes +2. Identifies new/modified/deleted notes (ETag comparison) +3. Enqueues changes only +4. Incremental updates processed + +### Performance Expectations + +**Embedding Generation:** +- **Without GPU**: 1-5 notes/second (CPU-bound) +- **With GPU**: 50-200 notes/second (highly parallel) +- **Initial indexing**: 100 notes ≈ 20-100 seconds (CPU), 1-2 seconds (GPU) + +**Search Query:** +- **Embedding generation**: 50-100ms +- **Vector search**: 10-50ms (depends on collection size) +- **Access verification**: 20-100ms per document (Nextcloud API calls) +- **Total latency**: 100-300ms typical + +**Resource Usage:** +- **Idle**: Minimal (background scanner sleeps) +- **Scanning**: Moderate CPU (ETag checks, API calls) +- **Processing**: High CPU/GPU (embedding generation) +- **Searching**: Low to moderate (depends on query frequency) + +### Background Sync Behavior + +**Scanner Triggers:** +- Hourly (configurable via `VECTOR_SYNC_INTERVAL`) +- Manual trigger via `nc_trigger_vector_sync` (future) + +**Queue Processing:** +- Continuous (workers always running) +- Batch processing (fetch 10 documents at a time) +- Concurrent workers (4 by default) + +**Error Handling:** +- Individual document failures logged but don't stop scanning +- Retries for transient errors (network timeouts, rate limits) +- Failed documents skipped, re-attempted on next scan + +**What Gets Indexed:** +- **Notes**: All notes accessible to the authenticated user +- **Future**: Calendar events, tasks, deck cards, files with text extraction, contacts + +## Monitoring & Observability + +### MCP Tools + +**`nc_get_vector_sync_status`** - Check sync status +```python +{ + "total_documents": 1234, + "indexed_documents": 1200, + "pending_documents": 34, + "sync_enabled": true, + "last_scan": "2025-01-15T14:30:00Z", + "status": "syncing" # idle | syncing | error +} +``` + +**Interpreting Status:** +- `idle`: No pending work, last scan completed successfully +- `syncing`: Currently processing documents +- `error`: Last scan failed (check logs) + +### Logs to Check + +**Scanner Logs:** +``` +[INFO] Vector sync scanner started (interval: 3600s) +[INFO] Scanning notes: found 150 documents +[INFO] Changes detected: 5 new, 2 modified, 1 deleted +[INFO] Enqueued 7 documents for processing +``` + +**Processor Logs:** +``` +[INFO] Processing document: note_123 +[DEBUG] Generated embedding (768 dimensions) +[INFO] Stored vector in Qdrant: note_123 +``` + +**Error Logs:** +``` +[ERROR] Failed to generate embedding for note_123: Connection timeout +[WARN] Qdrant connection lost, retrying... +[ERROR] Ollama embedding failed: Model not found +``` + +**Log Locations:** +- **Docker**: `docker compose logs mcp` +- **Local**: stdout (redirect to file if needed) +- **Kubernetes**: `kubectl logs -f deployment/nextcloud-mcp-server` + +### Metrics to Monitor + +**Indexing Progress:** +- Total documents vs indexed documents +- Pending queue size +- Processing rate (docs/second) + +**Search Performance:** +- Query latency (p50, p95, p99) +- Results per query +- Verification overhead (API calls per query) + +**Resource Usage:** +- Qdrant memory/disk usage +- Ollama CPU/GPU usage +- MCP server memory + +For detailed observability setup, see [docs/observability.md](observability.md). + +## Troubleshooting from Architecture Perspective + +### Documents Not Appearing in Search + +**Diagnosis Flow:** +1. Check sync status: `nc_get_vector_sync_status` + - `sync_enabled: false` → Enable with `VECTOR_SYNC_ENABLED=true` + - `status: error` → Check scanner logs for failures +2. Check queue size: + - `pending_documents > 0` → Processing in progress, wait + - `pending_documents == 0` but `indexed_documents` low → Scan hasn't run yet (wait up to 1 hour) +3. Check Qdrant: + - Connection errors in logs → Verify `QDRANT_URL` or `QDRANT_LOCATION` + - Collection empty → First scan hasn't completed +4. Check Ollama: + - Embedding errors in logs → Verify `OLLAMA_BASE_URL` + - Model not found → Pull model: `ollama pull nomic-embed-text` + +**Common Causes:** +- Sync disabled (default): Enable `VECTOR_SYNC_ENABLED=true` +- Ollama not running: Start Ollama service +- Qdrant not accessible: Check network/URL +- First scan in progress: Wait up to 1 hour + processing time + +### Slow Search Performance + +**Diagnosis:** +1. **Query embedding slow (>500ms)**: + - Ollama overloaded or CPU-bound + - Solution: Use GPU, upgrade CPU, or reduce concurrent requests +2. **Vector search slow (>200ms)**: + - Large collection (millions of vectors) + - Solution: Use network Qdrant with SSDs, add indexing +3. **Verification slow (>500ms)**: + - Many results to verify (10+ documents) + - Nextcloud API slow or overloaded + - Solution: Reduce `limit` parameter, optimize Nextcloud + +**Performance Tuning:** +- Reduce search `limit` (default: 10 results) +- Use network Qdrant for large collections +- Enable Ollama GPU acceleration +- Check Nextcloud API response times + +### Background Sync Stopped + +**Diagnosis:** +1. Check logs for errors: + - Authentication failures (401/403) → Token expired (OAuth) or credentials invalid (BasicAuth) + - Connection timeouts → Network issues with Nextcloud/Qdrant/Ollama + - Rate limiting (429) → Reduce scan frequency +2. Check `nc_get_vector_sync_status`: + - `status: error` → See logs for details + - `last_scan` timestamp old (>2 hours) → Scanner may have crashed +3. Verify services: + - Qdrant accessible: `curl http://qdrant:6333/` + - Ollama accessible: `curl http://ollama:11434/api/tags` + - Nextcloud accessible: Check API health + +**OAuth Mode (Future):** +- Offline access token expired → Re-provision via `provision_vector_sync` +- User deprovisioned access → Sync stops intentionally + +### Out of Memory + +**Diagnosis:** +1. Check Qdrant mode: + - In-memory mode with large collection → Switch to persistent or network mode +2. Check embedding batch size: + - Too many documents processed simultaneously → Reduce worker count +3. Check Ollama memory: + - Large models loaded → Use smaller embedding model + +**Solutions:** +- Use persistent or network Qdrant (frees server memory) +- Reduce concurrent processor workers +- Use smaller embedding model (`all-minilm` instead of `nomic-embed-text`) +- Increase server memory allocation + +## Limitations & Future Work + +### Current Limitations + +1. **Notes App Only** + - Architecture supports multiple apps (plugin system ready) + - Only `NotesScanner` and `NotesProcessor` implemented + - Future: Calendar, Deck, Files, Contacts + +2. **MCP Sampling Support** + - `nc_semantic_search_answer` requires client sampling capability + - Not all MCP clients support sampling yet + - Graceful fallback: Returns documents without generated answer + +3. **OAuth Background Sync** + - User-controlled background jobs not yet implemented + - Currently works in BasicAuth mode only + - Future: Users opt-in via `provision_vector_sync` tool + +4. **No Incremental Updates** + - Document changes trigger full re-embedding + - Cannot update just modified paragraphs + - Future: Paragraph-level chunking and incremental updates + +5. **No Query Caching** + - Each search generates new query embedding + - Repeated queries re-search Qdrant + - Future: Cache recent query embeddings and results + +6. **Single Embedding Model** + - Uses one model for all documents and queries + - Cannot customize per app or user + - Future: App-specific or user-selected models + +### Future Enhancements + +**Multi-App Support** (In Progress): +- Scanner plugins for Calendar, Deck, Files, Contacts +- Unified vector search across all apps +- App-specific metadata in vector payloads + +**User-Controlled Sync (OAuth Mode)**: +- `provision_vector_sync` and `deprovision_vector_sync` tools +- Per-user background job scheduling +- User dashboard for sync status and controls + +**Advanced Search Features**: +- Hybrid search (vector + keyword combined) +- Filtering by date range, app type, tags +- Aggregations and faceted search +- Search result explanations (why this matched) + +**Performance Optimizations**: +- Query caching for repeated searches +- Incremental document updates (paragraph-level) +- Batch query processing +- Qdrant HNSW indexing tuning + +**Embedding Improvements**: +- Support for OpenAI embeddings (ada-002, text-embedding-3) +- Multi-language embedding models +- Fine-tuned models for Nextcloud content +- Paragraph-level chunking for long documents + +## References + +### Architecture Decision Records (ADRs) + +- **[ADR-003: Vector Database Semantic Search](ADR-003-vector-database-semantic-search.md)** - Qdrant selection rationale, embedding strategy, hybrid search (superseded by ADR-007 but technical decisions remain valid) +- **[ADR-007: Background Vector Sync Job Management](ADR-007-background-vector-sync-job-management.md)** - Current implementation, Scanner-Queue-Processor architecture, plugin system +- **[ADR-008: MCP Sampling for Semantic Search](ADR-008-mcp-sampling-for-semantic-search.md)** - RAG with MCP sampling, client-server separation, prompt construction +- **[ADR-009: Semantic Search OAuth Scope](ADR-009-semantic-search-oauth-scope.md)** - OAuth scope model, dual-phase authorization, security rationale + +### Configuration & Setup + +- **[Configuration Guide](configuration.md)** - Environment variables, Qdrant setup, Ollama setup, detailed configuration options +- **[Installation Guide](installation.md)** - Deployment options (Docker, Kubernetes, local) +- **[Running the Server](running.md)** - Starting the server, transport options, testing + +### Monitoring & Troubleshooting + +- **[Observability Guide](observability.md)** - Logging, metrics, tracing, debugging +- **[Troubleshooting](troubleshooting.md)** - General issues and solutions + +### Related Documentation + +- **[OAuth Architecture](oauth-architecture.md)** - OAuth flows, scopes, token management +- **[Comparison with Context Agent](comparison-context-agent.md)** - When to use Nextcloud MCP Server vs Context Agent + +--- + +**Questions or Issues?** +- [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) +- [Contribute improvements](https://github.com/cbcoutinho/nextcloud-mcp-server/pulls) diff --git a/nextcloud_mcp_server/auth/userinfo_routes.py b/nextcloud_mcp_server/auth/userinfo_routes.py index 5a32b2e..09a870c 100644 --- a/nextcloud_mcp_server/auth/userinfo_routes.py +++ b/nextcloud_mcp_server/auth/userinfo_routes.py @@ -43,14 +43,17 @@ async def _get_processing_status(request: Request) -> dict[str, Any] | None: return None try: - # Get document queue from app state - document_queue = getattr(request.app.state, "document_queue", None) - if document_queue is None: - logger.debug("document_queue not available in app state") + # Get document receive stream from app state + document_receive_stream = getattr( + request.app.state, "document_receive_stream", None + ) + if document_receive_stream is None: + logger.debug("document_receive_stream not available in app state") return None - # Get pending count from queue - pending_count = document_queue.qsize() + # Get pending count from stream statistics + stats = document_receive_stream.statistics() + pending_count = stats.current_buffer_used # Get Qdrant client and query indexed count indexed_count = 0 @@ -63,7 +66,7 @@ async def _get_processing_status(request: Request) -> dict[str, Any] | None: # Count documents in collection count_result = await qdrant_client.count( - collection_name=settings.qdrant_collection + collection_name=settings.get_collection_name() ) indexed_count = count_result.count diff --git a/nextcloud_mcp_server/config.py b/nextcloud_mcp_server/config.py index fa161f8..603d28a 100644 --- a/nextcloud_mcp_server/config.py +++ b/nextcloud_mcp_server/config.py @@ -209,6 +209,46 @@ class Settings: "API key is only relevant for network mode and will be ignored." ) + def get_collection_name(self) -> str: + """ + Get Qdrant collection name. + + Auto-generates from deployment ID + model name unless explicitly set. + Deployment ID uses OTEL_SERVICE_NAME if configured, otherwise hostname. + + This enables: + - Safe embedding model switching (new model → new collection) + - Multi-server deployments (unique deployment IDs) + - Clear collection naming (shows deployment and model) + + Format: {deployment-id}-{model-name} + + Examples: + - "my-deployment-nomic-embed-text" (OTEL_SERVICE_NAME set) + - "mcp-container-all-minilm" (hostname fallback) + + Returns: + Collection name string + """ + import socket + + # Use explicit override if user configured non-default value + if self.qdrant_collection != "nextcloud_content": + return self.qdrant_collection + + # Determine deployment ID (OTEL service name or hostname fallback) + if self.otel_service_name != "nextcloud-mcp-server": # Non-default + deployment_id = self.otel_service_name + else: + # Fallback to hostname for simple Docker deployments without OTEL config + deployment_id = socket.gethostname() + + # Sanitize deployment ID and model name + deployment_id = deployment_id.lower().replace(" ", "-").replace("_", "-") + model_name = self.ollama_embedding_model.replace("/", "-").replace(":", "-") + + return f"{deployment_id}-{model_name}" + def get_settings() -> Settings: """Get application settings from environment variables. diff --git a/nextcloud_mcp_server/server/semantic.py b/nextcloud_mcp_server/server/semantic.py index e20bdd0..135dd90 100644 --- a/nextcloud_mcp_server/server/semantic.py +++ b/nextcloud_mcp_server/server/semantic.py @@ -68,17 +68,25 @@ def configure_semantic_tools(mcp: FastMCP): client = await get_client(ctx) username = client.username + logger.info( + f"Semantic search: query='{query}', user={username}, " + f"limit={limit}, score_threshold={score_threshold}" + ) + try: # Generate embedding for query embedding_service = get_embedding_service() query_embedding = await embedding_service.embed(query) + logger.debug( + f"Generated embedding for query (dimension={len(query_embedding)})" + ) # Search Qdrant with user filtering # Note: Currently only searching notes (doc_type="note") # Future: Remove doc_type filter to search all apps qdrant_client = await get_qdrant_client() search_response = await qdrant_client.query_points( - collection_name=settings.qdrant_collection, + collection_name=settings.get_collection_name(), query=query_embedding, query_filter=Filter( must=[ @@ -98,6 +106,15 @@ def configure_semantic_tools(mcp: FastMCP): with_vectors=False, # Don't return vectors to save bandwidth ) + logger.info( + f"Qdrant returned {len(search_response.points)} results " + f"(before deduplication and access verification)" + ) + if search_response.points: + # Log top 3 scores to help with threshold tuning + top_scores = [p.score for p in search_response.points[:3]] + logger.debug(f"Top 3 similarity scores: {top_scores}") + # Deduplicate by document ID (multiple chunks per document) seen_doc_ids = set() results = [] @@ -137,9 +154,14 @@ def configure_semantic_tools(mcp: FastMCP): except HTTPStatusError as e: if e.response.status_code == 403: # User lost access, skip this document + logger.debug(f"Skipping note {doc_id}: access denied (403)") continue elif e.response.status_code == 404: # Document was deleted but not yet removed from vector DB + logger.debug( + f"Skipping note {doc_id}: not found (404), " + f"likely deleted after indexing" + ) continue else: # Log other errors but continue processing @@ -148,6 +170,16 @@ def configure_semantic_tools(mcp: FastMCP): ) continue + logger.info( + f"Returning {len(results)} results after deduplication and access verification" + ) + if results: + result_details = [ + f"note_{r.id} (score={r.score:.3f}, title='{r.title}')" + for r in results[:5] # Show top 5 + ] + logger.debug(f"Top results: {', '.join(result_details)}") + return SemanticSearchResponse( results=results, query=query, @@ -259,7 +291,47 @@ def configure_semantic_tools(mcp: FastMCP): success=True, ) - # 3. Construct context from retrieved documents + # 3. Check if client supports sampling + from mcp.types import ClientCapabilities, SamplingCapability + + client_has_sampling = ctx.session.check_client_capability( + ClientCapabilities(sampling=SamplingCapability()) + ) + + # Log capability check result for debugging + logger.info( + f"Sampling capability check: client_has_sampling={client_has_sampling}, " + f"query='{query}'" + ) + if hasattr(ctx.session, "_client_params") and ctx.session._client_params: + client_caps = ctx.session._client_params.capabilities + logger.debug( + f"Client advertised capabilities: " + f"roots={client_caps.roots is not None}, " + f"sampling={client_caps.sampling is not None}, " + f"experimental={client_caps.experimental is not None}" + ) + + if not client_has_sampling: + logger.info( + f"Client does not support sampling (query: '{query}'), " + f"returning {len(search_response.results)} documents" + ) + return SamplingSearchResponse( + query=query, + generated_answer=( + f"[Sampling not supported by client]\n\n" + f"Your MCP client doesn't support answer generation. " + f"Found {search_response.total_found} relevant documents. " + f"Please review the sources below." + ), + sources=search_response.results, + total_found=search_response.total_found, + search_method="semantic_sampling_unsupported", + success=True, + ) + + # 4. Construct context from retrieved documents context_parts = [] for idx, result in enumerate(search_response.results, 1): context_parts.append( @@ -273,7 +345,7 @@ def configure_semantic_tools(mcp: FastMCP): context = "\n".join(context_parts) - # 4. Construct prompt - reuse user's query, add context and instructions + # 5. Construct prompt - reuse user's query, add context and instructions prompt = ( f"{query}\n\n" f"Here are relevant documents from Nextcloud (notes, calendar events, deck cards, files, contacts):\n\n" @@ -282,31 +354,35 @@ def configure_semantic_tools(mcp: FastMCP): f"Cite the document numbers when referencing specific information." ) - logger.debug( - f"Requesting sampling for query: {query} " - f"({len(search_response.results)} documents retrieved)" + logger.info( + f"Initiating sampling request: query_length={len(query)}, " + f"documents={len(search_response.results)}, " + f"prompt_length={len(prompt)}, max_tokens={max_answer_tokens}" ) - # 5. Request LLM completion via MCP sampling - try: - sampling_result = await ctx.session.create_message( - messages=[ - SamplingMessage( - role="user", - content=TextContent(type="text", text=prompt), - ) - ], - max_tokens=max_answer_tokens, - temperature=0.7, - model_preferences=ModelPreferences( - hints=[ModelHint(name="claude-3-5-sonnet")], - intelligencePriority=0.8, - speedPriority=0.5, - ), - include_context="thisServer", - ) + # 6. Request LLM completion via MCP sampling with timeout + import anyio - # 6. Extract answer from sampling response + try: + with anyio.fail_after(30): + sampling_result = await ctx.session.create_message( + messages=[ + SamplingMessage( + role="user", + content=TextContent(type="text", text=prompt), + ) + ], + max_tokens=max_answer_tokens, + temperature=0.7, + model_preferences=ModelPreferences( + hints=[ModelHint(name="claude-3-5-sonnet")], + intelligencePriority=0.8, + speedPriority=0.5, + ), + include_context="thisServer", + ) + + # 7. Extract answer from sampling response if sampling_result.content.type == "text": generated_answer = sampling_result.content.text else: @@ -318,7 +394,8 @@ def configure_semantic_tools(mcp: FastMCP): logger.info( f"Sampling successful: model={sampling_result.model}, " - f"stop_reason={sampling_result.stopReason}" + f"stop_reason={sampling_result.stopReason}, " + f"answer_length={len(generated_answer)}" ) return SamplingSearchResponse( @@ -332,23 +409,78 @@ def configure_semantic_tools(mcp: FastMCP): success=True, ) - except Exception as e: - # Fallback: Return documents without generated answer + except TimeoutError: logger.warning( - f"Sampling failed ({type(e).__name__}: {e}), " + f"Sampling request timed out after 30 seconds for query: '{query}', " f"returning search results only" ) + return SamplingSearchResponse( + query=query, + generated_answer=( + f"[Sampling request timed out]\n\n" + f"The answer generation took too long (>30s). " + f"Found {search_response.total_found} relevant documents. " + f"Please review the sources below or try a simpler query." + ), + sources=search_response.results, + total_found=search_response.total_found, + search_method="semantic_sampling_timeout", + success=True, + ) + + except McpError as e: + # Expected MCP protocol errors (user rejection, unsupported, etc.) + error_msg = str(e) + + if "rejected" in error_msg.lower() or "denied" in error_msg.lower(): + # User explicitly declined - this is normal, not an error + logger.info(f"User declined sampling request for query: '{query}'") + search_method = "semantic_sampling_user_declined" + user_message = "User declined to generate an answer" + elif "not supported" in error_msg.lower(): + # Client doesn't support sampling - also normal + logger.info(f"Sampling not supported by client for query: '{query}'") + search_method = "semantic_sampling_unsupported" + user_message = "Sampling not supported by this client" + else: + # Other MCP protocol errors + logger.warning( + f"MCP error during sampling for query '{query}': {error_msg}" + ) + search_method = "semantic_sampling_mcp_error" + user_message = f"Sampling unavailable: {error_msg}" return SamplingSearchResponse( query=query, generated_answer=( - f"[Sampling unavailable: {str(e)}]\n\n" + f"[{user_message}]\n\n" f"Found {search_response.total_found} relevant documents. " f"Please review the sources below." ), sources=search_response.results, total_found=search_response.total_found, - search_method="semantic_sampling_fallback", + search_method=search_method, + success=True, + ) + + except Exception as e: + # Truly unexpected errors - these SHOULD have tracebacks + logger.error( + f"Unexpected error during sampling for query '{query}': " + f"{type(e).__name__}: {e}", + exc_info=True, + ) + + return SamplingSearchResponse( + query=query, + generated_answer=( + f"[Unexpected error during sampling]\n\n" + f"Found {search_response.total_found} relevant documents. " + f"Please review the sources below." + ), + sources=search_response.results, + total_found=search_response.total_found, + search_method="semantic_sampling_error", success=True, ) @@ -413,7 +545,7 @@ def configure_semantic_tools(mcp: FastMCP): # Count documents in collection count_result = await qdrant_client.count( - collection_name=settings.qdrant_collection + collection_name=settings.get_collection_name() ) indexed_count = count_result.count diff --git a/nextcloud_mcp_server/vector/processor.py b/nextcloud_mcp_server/vector/processor.py index aafeb69..9105070 100644 --- a/nextcloud_mcp_server/vector/processor.py +++ b/nextcloud_mcp_server/vector/processor.py @@ -100,7 +100,7 @@ async def process_document(doc_task: DocumentTask, nc_client: NextcloudClient): # Handle deletion if doc_task.operation == "delete": await qdrant_client.delete( - collection_name=settings.qdrant_collection, + collection_name=settings.get_collection_name(), points_selector=Filter( must=[ FieldCondition( @@ -209,7 +209,7 @@ async def _index_document( # Upsert to Qdrant await qdrant_client.upsert( - collection_name=settings.qdrant_collection, + collection_name=settings.get_collection_name(), points=points, wait=True, ) diff --git a/nextcloud_mcp_server/vector/qdrant_client.py b/nextcloud_mcp_server/vector/qdrant_client.py index 32664c4..16d8157 100644 --- a/nextcloud_mcp_server/vector/qdrant_client.py +++ b/nextcloud_mcp_server/vector/qdrant_client.py @@ -59,30 +59,57 @@ async def get_qdrant_client() -> AsyncQdrantClient: logger.warning("No Qdrant mode configured, defaulting to :memory:") _qdrant_client = AsyncQdrantClient(":memory:") - # Ensure collection exists - collection_name = settings.qdrant_collection + # Get collection name (auto-generated from deployment ID + model) + collection_name = settings.get_collection_name() # Import here to avoid circular dependency from nextcloud_mcp_server.embedding import get_embedding_service embedding_service = get_embedding_service() - dimension = embedding_service.get_dimension() + expected_dimension = embedding_service.get_dimension() try: - await _qdrant_client.get_collection(collection_name) - logger.info(f"Using existing Qdrant collection: {collection_name}") - except Exception: + # Get existing collection + collection_info = await _qdrant_client.get_collection(collection_name) + actual_dimension = collection_info.config.params.vectors.size + + # Validate dimension matches + if actual_dimension != expected_dimension: + raise ValueError( + f"Dimension mismatch for collection '{collection_name}':\n" + f" Expected: {expected_dimension} (from embedding model '{settings.ollama_embedding_model}')\n" + f" Found: {actual_dimension}\n" + f"This usually means you changed the embedding model.\n" + f"Solutions:\n" + f" 1. Delete the old collection: Collection will be recreated with new dimensions\n" + f" 2. Set QDRANT_COLLECTION to use a different collection name\n" + f" 3. Revert OLLAMA_EMBEDDING_MODEL to the original model" + ) + + logger.info( + f"Using existing Qdrant collection: {collection_name} " + f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})" + ) + + except Exception as e: + # Check if it's a dimension mismatch error (re-raise it) + if isinstance(e, ValueError): + raise + # Collection doesn't exist, create it await _qdrant_client.create_collection( collection_name=collection_name, vectors_config=VectorParams( - size=dimension, + size=expected_dimension, distance=Distance.COSINE, ), ) logger.info( - f"Created Qdrant collection: {collection_name} " - f"(dimension={dimension}, distance=COSINE)" + f"Created Qdrant collection: {collection_name}\n" + f" Dimension: {expected_dimension}\n" + f" Model: {settings.ollama_embedding_model}\n" + f" Distance: COSINE\n" + f"Background sync will index all documents with this embedding model." ) return _qdrant_client diff --git a/nextcloud_mcp_server/vector/scanner.py b/nextcloud_mcp_server/vector/scanner.py index 72cba68..c625638 100644 --- a/nextcloud_mcp_server/vector/scanner.py +++ b/nextcloud_mcp_server/vector/scanner.py @@ -96,7 +96,7 @@ async def scan_user_documents( nc_client: Authenticated Nextcloud client initial_sync: If True, send all documents (first-time sync) """ - logger.info(f"Scanning documents for user: {user_id}") + logger.debug(f"Scanning documents for user: {user_id}") # Fetch all notes from Nextcloud notes = [note async for note in nc_client.notes.get_all_notes()] @@ -127,7 +127,7 @@ async def scan_user_documents( # Get indexed state from Qdrant qdrant_client = await get_qdrant_client() scroll_result = await qdrant_client.scroll( - collection_name=get_settings().qdrant_collection, + collection_name=get_settings().get_collection_name(), scroll_filter=Filter( must=[ FieldCondition(key="user_id", match=MatchValue(value=user_id)),