Update README

bump: version 0.47.0 → 0.48.0
Merge pull request #347 from cbcoutinho/feature/openai-provider-support
2025-11-23 00:55:16 +00:00 · 2025-11-23 00:53:24 +00:00 · 2025-11-23 01:52:55 +01:00 · 2025-11-23 01:51:42 +01:00 · 2025-11-23 01:26:22 +01:00 · 2025-11-23 00:23:47 +00:00
207 changed files with 42555 additions and 3128 deletions
@@ -5,3 +5,5 @@
 !uv.lock

 !nextcloud_mcp_server/**/*.py
+!nextcloud_mcp_server/**/*.html
+!nextcloud_mcp_server/auth/static/*
@@ -15,17 +15,17 @@ jobs:
      packages: write
    steps:
      - name: Check out
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
        with:
          fetch-depth: 0
          token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
      - name: Create bump and changelog
-        uses: commitizen-tools/commitizen-action@5b0848cd060263e24602d1eba03710e056ef7711 # 0.24.0
+        uses: commitizen-tools/commitizen-action@bb4f1df6601e2a1a891506581b0c53acdc88e07d # 0.26.0
        with:
          github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
          changelog_increment_filename: body.md
      - name: Release
-        uses: softprops/action-gh-release@6da8fa9354ddfdc4aeace5fc48d7f679b5214090 # v2.4.1
+        uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
        with:
          body_path: "body.md"
          tag_name: v${{ env.REVISION }}
@@ -12,11 +12,11 @@ jobs:
      packages: write
    steps:
      - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # v5
+        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5
        with:
          # list of Docker images to use as base name for tags
          images: |
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
        with:
          fetch-depth: 0

@@ -24,6 +24,18 @@ jobs:
          git config user.name "$GITHUB_ACTOR"
          git config user.email "$GITHUB_ACTOR@users.noreply.github.com"

+      - name: Install Helm
+        uses: azure/setup-helm@1a275c3b69536ee54be43f2070a358922e12c8d4 # v4.3.1
+        with:
+          version: v3.16.0
+
+      - name: Add Helm repositories and update dependencies
+        run: |
+          helm repo add qdrant https://qdrant.github.io/qdrant-helm
+          helm repo add ollama https://otwld.github.io/ollama-helm
+          helm repo update
+          helm dependency build charts/nextcloud-mcp-server
+
      - name: Run chart-releaser
        uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
        env:
@@ -0,0 +1,113 @@
+name: RAG Evaluation
+
+on:
+  workflow_dispatch:
+    inputs:
+      manual_path:
+        description: 'Path to Nextcloud User Manual PDF in Nextcloud'
+        required: false
+        default: 'Nextcloud Manual.pdf'
+      embedding_model:
+        description: 'OpenAI embedding model'
+        required: false
+        default: 'openai/text-embedding-3-small'
+      generation_model:
+        description: 'OpenAI generation model'
+        required: false
+        default: 'openai/gpt-4o-mini'
+
+jobs:
+  rag-evaluation:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      models: read
+
+    steps:
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          submodules: 'true'
+
+      ###### Required to build OIDC App ######
+      - name: Set up php 8.4
+        uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
+        with:
+          php-version: 8.4
+          coverage: none
+
+      - name: Install OIDC app composer dependencies
+        run: |
+          cd third_party/oidc
+          composer install --no-dev
+      ###### Required to build OIDC App ######
+
+      - name: Run docker compose with vector sync
+        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
+        with:
+          compose-file: "./docker-compose.yml"
+          up-flags: "--build"
+        env:
+          # Override MCP container environment for OpenAI + vector sync
+          VECTOR_SYNC_ENABLED: "true"
+          VECTOR_SYNC_SCAN_INTERVAL: "30"
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
+
+      - name: Wait for Nextcloud to be ready
+        run: |
+          echo "Waiting for Nextcloud..."
+          max_attempts=60
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "Service did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+            sleep 5
+          done
+          echo "Nextcloud is ready."
+
+      - name: Wait for MCP server to be ready
+        run: |
+          echo "Waiting for MCP server..."
+          max_attempts=30
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health | grep -q "200"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "MCP server did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
+            sleep 2
+          done
+          echo "MCP server is ready."
+
+      - name: Run RAG evaluation tests
+        env:
+          NEXTCLOUD_HOST: "http://localhost:8080"
+          NEXTCLOUD_USERNAME: "admin"
+          NEXTCLOUD_PASSWORD: "admin"
+          RAG_MANUAL_PATH: ${{ inputs.manual_path }}
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+        run: |
+          uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: rag-evaluation-results
+          path: |
+            pytest-results.xml
+          retention-days: 30
@@ -18,9 +18,9 @@ jobs:
      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
      - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Install Python 3.11
        run: uv python install 3.11
      - name: Build
@@ -9,9 +9,9 @@ jobs:
  linting:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Check format
        run: |
          uv run --frozen ruff format --diff
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
        with:
          submodules: 'true'

@@ -52,10 +52,11 @@ jobs:
        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
        with:
          compose-file: "./docker-compose.yml"
+          #compose-flags: "--profile qdrant"
          up-flags: "--build"

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4

      - name: Install Playwright dependencies
        run: |
@@ -84,4 +85,4 @@ jobs:
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
        run: |
-          uv run pytest -v --log-cli-level=WARN --ignore=tests/manual
+          uv run pytest -v --log-cli-level=WARN -m unit -m smoke
@@ -5,5 +5,14 @@ __pycache__/
 .env.local
 .env.*.local

+# Git
+worktrees/
+
+docker-compose.override.yml
+
 # Generated by pytest used to login users
 .nextcloud_oauth_*.json
+.playwright-mcp/
+
+# RAG Evaluation
+tests/rag_evaluation/fixtures/
@@ -1,6 +1,6 @@
-[submodule "oidc"]
-	path = third_party/oidc
-	url = https://github.com/cbcoutinho/oidc
 [submodule "third_party/oidc"]
 	path = third_party/oidc
 	url = https://github.com/cbcoutinho/oidc
+[submodule "third_party/notes"]
+	path = third_party/notes
+	url = https://github.com/cbcoutinho/notes
@@ -1,3 +1,410 @@
+## v0.48.0 (2025-11-23)
+
+### Feat
+
+- Add tag management methods to WebDAV client
+
+## v0.47.0 (2025-11-23)
+
+### Feat
+
+- Add OpenAI provider support for embeddings and generation
+
+## v0.46.2 (2025-11-22)
+
+### Fix
+
+- **smithery**: Enable JSON response format for scanner compatibility
+
+## v0.46.1 (2025-11-22)
+
+### Perf
+
+- Optimize vector viz search performance
+
+## v0.46.0 (2025-11-22)
+
+### Feat
+
+- Add Smithery CLI deployment support
+- Implement ADR-016 Smithery stateless deployment mode
+
+### Fix
+
+- **smithery**: Add JSON Schema metadata to mcp-config endpoint
+- **smithery**: Use container runtime pattern for config discovery
+- Add Smithery lifespan and auth mode detection
+
+## v0.45.0 (2025-11-22)
+
+### Feat
+
+- Add context expansion to semantic search with chunk overlap removal
+- Use Ollama native batch API in embed_batch()
+- Implement Qdrant placeholder state management
+- Switch files to use numeric IDs with file_path resolution
+- Implement per-chunk vector visualization with context expansion
+
+### Fix
+
+- Use alpha_composite for proper RGBA highlight blending
+- Remove pymupdf.layout.activate() to fix page_chunks behavior
+- Centralize PDF processing and generate separate images per chunk
+- Set is_placeholder=False in processor to fix search filtering
+- Increase placeholder staleness threshold to 5x scan interval
+- Add placeholder staleness check to prevent duplicate processing
+- Use empty SparseVector instead of None for placeholders
+- Return empty array instead of null for query_coords when no results
+- Align PDF text extraction between indexing and context expansion
+- Update models and viz to use int-only doc_id
+- Reconstruct full content for notes to match indexed offsets
+- Add async/await, PDF metadata, and type safety fixes
+
+### Refactor
+
+- Simplify PDF text extraction with single to_markdown call
+
+### Perf
+
+- Optimize PDF processing with parallel extraction and single-render highlights
+
+## v0.44.1 (2025-11-21)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.22,<1.23
+
+## v0.44.0 (2025-11-19)
+
+### Feat
+
+- Improve vector visualization with static assets and fixes
+- Redesign UI to match Nextcloud ecosystem aesthetic
+
+### Fix
+
+- Improve 3D plot rendering with explicit dimensions and window resize support
+- Preserve 3D plot camera and improve documentation
+- Preserve 3D plot camera position and fix CSS loading
+
+## v0.43.0 (2025-11-18)
+
+### Feat
+
+- Replace custom document chunker with LangChain MarkdownTextSplitter
+
+## v0.42.0 (2025-11-17)
+
+### Feat
+
+- **viz**: Add dual-score display and improve UI controls
+
+## v0.41.0 (2025-11-17)
+
+### Feat
+
+- add configurable fusion algorithms for BM25 hybrid search
+- add chunk position tracking to vector indexing and search
+- add vector viz template and chunk context endpoint
+
+### Fix
+
+- prevent infinite loop in DocumentChunker with position tracking
+- Relax SearchResult validation to support DBSF fusion scores > 1.0
+
+## v0.40.0 (2025-11-16)
+
+### Feat
+
+- add unified provider architecture with Amazon Bedrock support
+
+### Fix
+
+- suppress Starlette middleware type warnings in ty checker
+
+## v0.39.0 (2025-11-16)
+
+### Feat
+
+- Implement BM25 hybrid search with native Qdrant RRF fusion
+
+### Fix
+
+- Handle named vectors in visualization and semantic search
+- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
+- Update viz routes to use BM25 hybrid search after refactor
+
+## v0.38.0 (2025-11-16)
+
+### Feat
+
+- add concurrent uploads and --force flag to upload command
+- implement RAG evaluation framework with CLI tooling
+
+### Fix
+
+- download qrels from BEIR ZIP instead of HuggingFace
+
+### Refactor
+
+- migrate asyncio to anyio for consistent structured concurrency
+- replace httpx client with NextcloudClient in upload command
+
+### Perf
+
+- Eliminate double-fetching in semantic search sampling
+- fix vector viz search performance and visual encoding
+- make note deletion concurrent in upload --force
+
+## v0.37.0 (2025-11-16)
+
+### Feat
+
+- Add OpenTelemetry tracing to @instrument_tool decorator
+
+## v0.36.0 (2025-11-15)
+
+### BREAKING CHANGE
+
+- Search algorithms now require Qdrant to be populated.
+Vector sync must be enabled and documents indexed for search to work.
+
+### Feat
+
+- Normalize hybrid search RRF scores to 0-1 range
+- Enhance vector visualization UI and parallelize search verification
+- Add Vector Viz tab to app home page
+- Add vector visualization pane with multi-select document types
+- Implement custom PCA to remove sklearn dependency
+- Add multi-document Protocol with cross-app search support
+- Update nc_semantic_search tool with algorithm selection
+- Implement unified search algorithm module
+
+### Fix
+
+- Reorder tabs and fix viz pane session access
+
+### Refactor
+
+- Optimize Nextcloud access verification with centralized filtering
+- Make all search algorithms query Qdrant payload, not Nextcloud
+
+### Perf
+
+- Exclude vector-sync status polling from distributed tracing
+
+## v0.35.0 (2025-11-15)
+
+### Feat
+
+- Enable SSE transport for mcp service and update test fixtures
+
+## v0.34.2 (2025-11-13)
+
+### Fix
+
+- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
+
+## v0.34.1 (2025-11-13)
+
+### Fix
+
+- return all notes when search query is empty
+
+## v0.34.0 (2025-11-13)
+
+### Feat
+
+- Complete Phase 5 - Instrument all 93 MCP tools
+- Add instrumentation decorator and apply to notes tools (Phase 5)
+- Add OAuth token and database metrics (Phases 3-4)
+- Add metrics instrumentation for queue, health, and database operations
+
+## v0.33.1 (2025-11-13)
+
+### Fix
+
+- Move grafana_folder from labels to annotations
+
+## v0.33.0 (2025-11-13)
+
+### Feat
+
+- Add Grafana dashboard and vector sync metric instrumentation
+
+## v0.32.1 (2025-11-12)
+
+### Fix
+
+- add dynamic dimension detection for Ollama embedding models
+
+## v0.32.0 (2025-11-11)
+
+### Feat
+
+- **ollama**: Pull model on startup if not available in ollama
+- add dynamic vector sync status updates with htmx polling
+- add webhook management UI and BeforeNodeDeletedEvent support
+- validate Nextcloud webhook schemas and document findings
+
+### Fix
+
+- improve webapp tab UI with CSS Grid and viewport-filling container
+
+### Refactor
+
+- move webapp from /user/page to /app
+- consolidate database storage for webhooks and OAuth tokens
+
+## v0.31.1 (2025-11-10)
+
+### Refactor
+
+- simplify OpenTelemetry tracing configuration
+
+## v0.31.0 (2025-11-10)
+
+### Feat
+
+- skip tracing for health and metrics endpoints
+
+### Fix
+
+- add retry logic for ETag conflicts in category change test
+- optimize Notes API pagination with pruneBefore parameter
+
+## v0.30.0 (2025-11-10)
+
+### Feat
+
+- **helm**: Add document chunking configuration
+- **vector**: Add configurable chunk size and overlap for document embedding
+- **vector**: Support multiple embedding models with auto-generated collection names
+
+### Fix
+
+- Support in-memory Qdrant for CI testing
+
+## v0.29.2 (2025-11-09)
+
+### Fix
+
+- **helm**: Set default strategy to Recreate
+
+## v0.29.1 (2025-11-09)
+
+### Fix
+
+- **observability**: isolate metrics endpoint to dedicated port
+
+## v0.29.0 (2025-11-09)
+
+### Feat
+
+- **helm**: Add observability support with ServiceMonitor and Grafana dashboard
+
+### Fix
+
+- **readiness**: Only check external Qdrant in network mode
+
+## v0.28.0 (2025-11-09)
+
+### Feat
+
+- **observability**: Add comprehensive monitoring with Prometheus and OpenTelemetry
+
+### Fix
+
+- **vector**: Handle missing 'modified' field in notes gracefully
+
+## v0.27.3 (2025-11-09)
+
+### Fix
+
+- **ci**: Use helm dependency build instead of update to use Chart.lock
+
+## v0.27.2 (2025-11-09)
+
+### Fix
+
+- **helm**: update Qdrant dependency condition to match new mode structure
+
+## v0.27.1 (2025-11-09)
+
+### Fix
+
+- **ci**: add Helm repository setup to chart release workflow
+
+## v0.27.0 (2025-11-09)
+
+### Feat
+
+- **helm**: add Qdrant local mode support with three deployment options [skip ci]
+- add Qdrant local mode support with in-memory and persistent storage
+- implement ADR-009 - refactor semantic search to use generic semantic:read scope
+- implement MCP sampling for semantic search RAG (ADR-008)
+- add optional vector database and semantic search to helm chart
+- add vector sync processing status to /app endpoint
+- implement semantic search tool and fix vector sync issues (ADR-007 Phase 3)
+- implement vector sync scanner and processor (ADR-007 Phase 2)
+
+### Fix
+
+- implement deletion grace period and vector sync status tool
+- remove unnecessary urllib3<2.0 constraint
+- integrate vector sync tasks with Starlette lifespan for streamable-http
+
+### Refactor
+
+- migrate vector sync from asyncio.Queue to anyio memory object streams
+- update to Qdrant query_points API and fix Playwright Keycloak login
+
+## v0.26.1 (2025-11-08)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.21,<1.22
+
+## v0.26.0 (2025-11-08)
+
+### Feat
+
+- add real elicitation integration test with python-sdk MCP client
+- unify session architecture and enhance login status visibility
+
+### Fix
+
+- Consolidate OAuth callbacks and implement PKCE for all flows
+
+## v0.25.0 (2025-11-05)
+
+### BREAKING CHANGE
+
+- All OAuth deployments must be reconfigured to specify
+resource URIs (NEXTCLOUD_MCP_SERVER_URL and NEXTCLOUD_RESOURCE_URI) and
+choose between multi-audience or token exchange mode.
+
+### Feat
+
+- Implement ADR-005 unified token verifier to eliminate token passthrough vulnerability
+
+### Fix
+
+- Implement proper OAuth resource parameters and PRM-based discovery
+- Simplify token verifier to be RFC 7519 compliant
+- Use Keycloak client ID for NEXTCLOUD_RESOURCE_URI in token exchange
+- Correct OAuth token audience validation for multi-audience mode
+
+### Refactor
+
+- Eliminate duplicate validation logic in UnifiedTokenVerifier
+
+## v0.24.1 (2025-11-04)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.20,<1.21
+
 ## v0.24.0 (2025-11-04)

 ### Feat
@@ -5,23 +5,29 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Coding Conventions

 ### async/await Patterns
- **Use anyio + asyncio hybrid** - Both libraries are available
+- **Use anyio for all async operations** - Provides structured concurrency
  - pytest runs in `anyio` mode (`anyio_mode = "auto"` in pyproject.toml)
-  - asyncio used in auth modules (refresh_token_storage.py, token_exchange.py, token_broker.py)
-  - anyio used in calendar.py, client_registration.py, app.py
+  - Use `anyio.create_task_group()` for concurrent execution (NOT `asyncio.gather()`)
+  - Use `anyio.Lock()` for synchronization primitives (NOT `asyncio.Lock()`)
+  - Use `anyio.run()` for entry points (NOT `asyncio.run()`)
  - Prefer standard async/await syntax without explicit library imports when possible
+  - Examples: app.py, search/hybrid.py, search/verification.py, auth/token_broker.py

 ### Type Hints
 - **Use Python 3.10+ union syntax**: `str | None` instead of `Optional[str]`
 - **Use lowercase generics**: `dict[str, Any]` instead of `Dict[str, Any]`
 - **Type all function signatures** - Parameters and return types
- **No explicit type checker configured** - Ruff handles linting only
+- **Type checker**: `ty` is configured for static type checking
+  ```bash
+  uv run ty check -- nextcloud_mcp_server
+  ```

 ### Code Quality
- **Run ruff before committing**:
+- **Run ruff and ty before committing**:
  ```bash
  uv run ruff check
  uv run ruff format
+  uv run ty check -- nextcloud_mcp_server
  ```
 - **Ruff configuration** in pyproject.toml (extends select: ["I"] for import sorting)

@@ -55,8 +61,60 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - `nextcloud_mcp_server/server/` - MCP tool/resource definitions
 - `nextcloud_mcp_server/auth/` - OAuth/OIDC authentication
 - `nextcloud_mcp_server/models/` - Pydantic response models
+- `nextcloud_mcp_server/providers/` - Unified LLM provider infrastructure (embeddings + generation)
 - `tests/` - Layered test suite (unit, smoke, integration, load)

+### Provider Architecture (ADR-015)
+
+**Unified Provider System** for embeddings and text generation:
+
+**Location:** `nextcloud_mcp_server/providers/`
+- `base.py` - `Provider` ABC with optional capabilities
+- `registry.py` - Auto-detection and factory pattern
+- `ollama.py` - Ollama provider (embeddings + generation)
+- `anthropic.py` - Anthropic provider (generation only)
+- `bedrock.py` - Amazon Bedrock provider (embeddings + generation)
+- `simple.py` - Simple in-memory provider (embeddings only, fallback)
+
+**Usage:**
+```python
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Auto-detects from environment
+
+# Check capabilities
+if provider.supports_embeddings:
+    embeddings = await provider.embed_batch(texts)
+
+if provider.supports_generation:
+    text = await provider.generate("prompt", max_tokens=500)
+```
+
+**Environment Variables:**
+
+Bedrock:
+- `AWS_REGION` - AWS region (e.g., "us-east-1")
+- `BEDROCK_EMBEDDING_MODEL` - Embedding model ID (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL` - Generation model ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` - Optional, uses AWS credential chain
+
+Ollama:
+- `OLLAMA_BASE_URL` - API URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL` - Embedding model (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL` - Generation model (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL` - SSL verification (default: "true")
+
+Simple (fallback, no config needed):
+- `SIMPLE_EMBEDDING_DIMENSION` - Dimension (default: 384)
+
+**Auto-Detection Priority:** Bedrock → Ollama → Simple
+
+**Backward Compatibility:**
+- Old code using `nextcloud_mcp_server.embedding.get_embedding_service()` still works
+- `EmbeddingService` now wraps `get_provider()` internally
+
+**For Details:** See `docs/ADR-015-unified-provider-architecture.md`
+
 ## Development Commands (Quick Reference)

 ### Testing
@@ -167,23 +225,35 @@ docker compose exec db mariadb -u root -ppassword nextcloud -e \

 ### Progressive Consent Architecture (ADR-004)

-**Status**: Always enabled in OAuth mode (default)
+**Important**: Progressive consent is a *mechanism* for granting access, not a feature flag. The architecture is always present in OAuth mode. Whether provisioning tools are available is controlled by `ENABLE_OFFLINE_ACCESS`.

 **What is Progressive Consent?**
 - Dual OAuth flow architecture that separates client authentication (Flow 1) from resource provisioning (Flow 2)
 - Flow 1: MCP client authenticates directly to IdP with resource scopes (notes:*, calendar:*, etc.)
  - Token audience: "mcp-server"
  - Client receives resource-scoped token for MCP session
- Flow 2: Server explicitly provisions Nextcloud access via separate login
+- Flow 2: Server explicitly provisions Nextcloud access via separate login (only when `ENABLE_OFFLINE_ACCESS=true`)
  - Server requests: openid, profile, email, offline_access
  - Token audience: "nextcloud"
  - Server receives refresh token for offline access
  - Client never sees this token
 - Provides clear separation between session tokens and offline access tokens

+**Modes:**
+- **Pass-through mode** (`ENABLE_OFFLINE_ACCESS=false`, default):
+  - No Flow 2 provisioning
+  - Server uses client's token to access Nextcloud (pass-through)
+  - No provisioning tools available
+  - Suitable for stateless, client-driven operations
+- **Offline access mode** (`ENABLE_OFFLINE_ACCESS=true`):
+  - Flow 2 provisioning available
+  - Server stores refresh tokens for background operations
+  - Provisioning tools available: `provision_nextcloud_access`, `check_logged_in`
+  - Suitable for background jobs and server-initiated operations
+
 **When to use OAuth mode:**
 - Multi-user deployments
- Background jobs requiring offline access
+- Background jobs requiring offline access (with `ENABLE_OFFLINE_ACCESS=true`)
 - Enhanced security with separate authorization contexts
 - Explicit user control over resource access

@@ -212,6 +282,82 @@ docker compose exec db mariadb -u root -ppassword nextcloud -e \

 **Testing**: Extract `data["results"]` from MCP responses, not `data` directly.

+## MCP Sampling for RAG (ADR-008)
+
+**What is MCP Sampling?**
+MCP sampling allows servers to request LLM completions from their clients. This enables Retrieval-Augmented Generation (RAG) patterns where the server retrieves context and the client's LLM generates answers.
+
+**When to use sampling:**
+- Generating natural language answers from retrieved documents
+- Synthesizing information from multiple sources
+- Creating summaries with citations
+
+**Implementation Pattern** (see ADR-008 for details):
+
+```python
+from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
+
+@mcp.tool()
+@require_scopes("notes:read")
+async def nc_notes_semantic_search_answer(
+    query: str, ctx: Context, limit: int = 5, max_answer_tokens: int = 500
+) -> SamplingSearchResponse:
+    # 1. Retrieve documents
+    search_response = await nc_notes_semantic_search(query, ctx, limit)
+
+    # 2. Check for no results (don't waste sampling call)
+    if not search_response.results:
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer="No relevant documents found.",
+            sources=[], total_found=0, success=True
+        )
+
+    # 3. Construct prompt with retrieved context
+    prompt = f"{query}\n\nDocuments:\n{format_sources(search_response.results)}\n\nProvide answer with citations."
+
+    # 4. Request LLM completion via sampling
+    try:
+        result = await ctx.session.create_message(
+            messages=[SamplingMessage(role="user", content=TextContent(type="text", text=prompt))],
+            max_tokens=max_answer_tokens,
+            temperature=0.7,
+            model_preferences=ModelPreferences(
+                hints=[ModelHint(name="claude-3-5-sonnet")],
+                intelligencePriority=0.8,
+                speedPriority=0.5,
+            ),
+            include_context="thisServer",
+        )
+
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer=result.content.text,
+            sources=search_response.results,
+            model_used=result.model,
+            stop_reason=result.stopReason,
+            success=True
+        )
+    except Exception as e:
+        # Fallback: Return documents without generated answer
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer=f"[Sampling unavailable: {e}]\n\nFound {len(search_response.results)} documents.",
+            sources=search_response.results,
+            search_method="semantic_sampling_fallback",
+            success=True
+        )
+```
+
+**Key Points**:
+- **No server-side LLM**: Server has no API keys, client controls which model is used
+- **Graceful degradation**: Tool always returns useful results even if sampling fails
+- **User control**: MCP clients SHOULD prompt users to approve sampling requests
+- **No results optimization**: Skip sampling call when no documents found
+- **Fixed prompts**: Prompts are not user-configurable to avoid injection risks
+
+**Reference**: See `nc_notes_semantic_search_answer` in `nextcloud_mcp_server/server/notes.py:517` and ADR-008 for complete implementation.
+
 ## Testing Best Practices (MANDATORY)

 ### Always Run Tests
@@ -303,3 +449,7 @@ docker compose exec app php occ user_oidc:provider keycloak
 - `docs/configuration.md` - Configuration options
 - `docs/authentication.md` - Authentication modes
 - `docs/running.md` - Running the server
+
+**For additional information regarding MCP during development, see**:
+- `../../Software/modelcontextprotocol/` - MCP spec
+- `../../Software/python-sdk/` - Python MCP SDK
@@ -1,16 +1,24 @@
-FROM ghcr.io/astral-sh/uv:0.9.7-python3.11-alpine@sha256:0006b77df7ebf46e68959fdc8d3af9d19f1adfae8c2e7e77907ad257e5d05be4
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/

 # Install dependencies
 # 1. git (required for caldav dependency from git)
 # 2. sqlite for development with token db
-RUN apk add --no-cache git sqlite
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git \
+    tesseract-ocr \
+    sqlite3 && apt clean

 WORKDIR /app

 COPY . .

-RUN uv sync --locked --no-dev
+RUN uv sync --locked --no-dev --no-editable --no-cache

 ENV PYTHONUNBUFFERED=1
+ENV VIRTUAL_ENV=/app/.venv
+ENV PATH=/app/.vnev/bin:$PATH
+ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

 ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
@@ -0,0 +1,44 @@
+# Dockerfile for Smithery stateless deployment
+# ADR-016: Stateless mode for multi-user public Nextcloud instances
+#
+# This image excludes:
+# - Vector database dependencies (qdrant-client)
+# - Background sync workers
+# - Admin UI routes (/app)
+# - Semantic search tools
+#
+# Features included:
+# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
+# - Per-session app password authentication
+# - Multi-user support via Smithery session config
+
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+WORKDIR /app
+
+# Install uv for fast dependency management
+COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/
+
+# Install dependencies
+# 1. git (required for caldav dependency from git)
+# 2. sqlite for development with token db
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git
+
+# Copy project files
+COPY . .
+
+RUN uv sync --locked --no-dev --no-editable --no-cache
+
+# Set Smithery mode environment variables
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081 by default
+EXPOSE 8081
+
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
+
+CMD ["/app/.venv/bin/smithery-main"]
@@ -1,285 +1,160 @@
+```markdown
+<p align="center">
+  <img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
+</p>
+
 # Nextcloud MCP Server

+[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
 [![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)

-**Enable AI assistants to interact with your Nextcloud instance.**
+**A production-ready MCP server that connects AI assistants to your Nextcloud instance.**

-The Nextcloud MCP (Model Context Protocol) server allows Large Language Models like Claude, GPT, and Gemini to interact with your Nextcloud data through a secure API. Create notes, manage calendars, organize contacts, work with files, and more - all through natural language.
+Enable Large Language Models like Claude, GPT, and Gemini to interact with your Nextcloud data through a secure API. Create notes, manage calendars, organize contacts, work with files, and more - all through natural language conversations.
+
+This is a **dedicated standalone MCP server** designed for external MCP clients like Claude Code and IDEs. It runs independently of Nextcloud (Docker, VM, Kubernetes, or local) and provides deep CRUD operations across Nextcloud apps.

 > [!NOTE]
-> **Nextcloud has two ways to enable AI access:** Nextcloud provides [Context Agent](https://github.com/nextcloud/context_agent), an AI agent backend that powers the [Assistant](https://github.com/nextcloud/assistant) app and allows AI to interact with Nextcloud apps like Calendar, Talk, and Contacts. Context Agent runs as an ExApp inside Nextcloud and also _[exposes an MCP server](https://docs.nextcloud.com/server/stable/admin_manual/ai/app_context_agent.html#using-nextcloud-mcp-server)_ for external MCP clients.
->
-> This project (Nextcloud MCP Server) is a **dedicated standalone MCP server** designed specifically for external MCP clients like Claude Code and IDEs, with deep CRUD operations and OAuth support. It does not require any additional AI-features to be enabled in Nextcloud beyond the apps that you intend to interact with.
-
-### High-level Comparison: Nextcloud MCP Server vs. Nextcloud AI Stack
-
-| Aspect | **Nextcloud MCP Server**<br/>(This Project) | **Nextcloud AI Stack**<br/>(Assistant + Context Agent) |
-|--------|---------------------------------------------|--------------------------------------------------------|
-| **Purpose** | External MCP client access to Nextcloud | AI assistance within Nextcloud UI |
-| **Deployment** | Standalone (Docker, VM, K8s) | Inside Nextcloud (ExApp via AppAPI) |
-| **Primary Users** | Claude Code, IDEs, external developers | Nextcloud end users via Assistant app |
-| **Authentication** | OAuth2/OIDC or Basic Auth | Session-based (integrated) |
-| **Notes Support** | ✅ Full CRUD + search (7 tools) | ❌ Not implemented |
-| **Calendar** | ✅ Full CalDAV + tasks (20+ tools) | ✅ Events, free/busy, tasks (4 tools) |
-| **Contacts** | ✅ Full CardDAV (8 tools) | ✅ Find person, current user (2 tools) |
-| **Files (WebDAV)** | ✅ Full filesystem access (12 tools) | ✅ Read, folder tree, sharing (3 tools) |
-| **Document Processing** | ✅ OCR with progress (PDF, DOCX, images) | ❌ Not implemented |
-| **Deck** | ✅ Full project management (15 tools) | ✅ Basic board/card ops (2 tools) |
-| **Tables** | ✅ Row operations (5 tools) | ❌ Not implemented |
-| **Cookbook** | ✅ Full recipe management (13 tools) | ❌ Not implemented |
-| **Talk** | ❌ Not implemented | ✅ Messages, conversations (4 tools) |
-| **Mail** | ❌ Not implemented | ✅ Send email (2 tools) |
-| **AI Features** | ❌ Not implemented | ✅ Image gen, transcription, doc gen (4 tools) |
-| **Web/Maps** | ❌ Not implemented | ✅ Search, weather, transit (5 tools) |
-| **MCP Resources** | ✅ Structured data URIs | ❌ Not supported |
-| **External MCP** | ❌ Pure server | ✅ Consumes external MCP servers |
-| **Safety Model** | Client-controlled | Built-in safe/dangerous distinction |
-| **Best For** | • Deep CRUD operations<br/>• External integrations<br/>• OAuth security<br/>• IDE/editor integration | • AI-driven actions in Nextcloud UI<br/>• Multi-service orchestration<br/>• User task automation<br/>• MCP aggregation hub |
-
-See our [detailed comparison](docs/comparison-context-agent.md) for architecture diagrams, workflow examples, and guidance on when to use each approach.
-
-Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request!
-
-### Authentication
-
-| Mode | Security | Best For |
-|------|----------|----------|
-| **OAuth2/OIDC** ⚠️ **Experimental** | 🔒 High | Testing, evaluation (requires patch for app-specific APIs) |
-| **Basic Auth** ✅ | Lower | Development, testing, production |
-
-> [!IMPORTANT]
-> **OAuth is experimental** and requires a manual patch to the `user_oidc` app for full functionality:
-> - **Required patch**: `user_oidc` app needs modifications for Bearer token support ([issue #1221](https://github.com/nextcloud/user_oidc/issues/1221))
-> - **Impact**: Without the patch, most app-specific APIs (Notes, Calendar, Contacts, Deck, etc.) will fail with 401 errors
-> - **What works without patches**: OAuth flow, PKCE support (with `oidc` v1.10.0+), OCS APIs
-> - **Production use**: Wait for upstream patch to be merged into official releases
->
-> See [OAuth Upstream Status](docs/oauth-upstream-status.md) for detailed information on required patches and workarounds.
-
-OAuth2/OIDC provides secure, per-user authentication with access tokens. See [Authentication Guide](docs/authentication.md) for details.
+> **Looking for AI features inside Nextcloud?** Nextcloud also provides [Context Agent](https://github.com/nextcloud/context_agent), which powers the Assistant app and runs as an ExApp inside Nextcloud. See [docs/comparison-context-agent.md](docs/comparison-context-agent.md) for a detailed comparison of use cases.

 ## Quick Start

-### 1. Install
+The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
+
+1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
+2. Click "Deploy" and configure:
+   - **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
+   - **Username**: Your Nextcloud username
+   - **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
+
+> [!NOTE]
+> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
+
+## Docker (Self-Hosted)
+
+For full features including semantic search, run with Docker:

 ```bash
-# Clone the repository
-git clone https://github.com/cbcoutinho/nextcloud-mcp-server.git
-cd nextcloud-mcp-server
-
-# Install with uv (recommended)
-uv sync
-
-# Or using Docker
-docker pull ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
-
-# Or deploy to Kubernetes with Helm
-helm repo add nextcloud-mcp https://cbcoutinho.github.io/nextcloud-mcp-server
-helm repo update
-helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
-  --set nextcloud.host=https://cloud.example.com \
-  --set auth.basic.username=myuser \
-  --set auth.basic.password=mypassword
-```
-
-See [Installation Guide](docs/installation.md) for detailed instructions, or [Helm Chart README](charts/nextcloud-mcp-server/README.md) for Kubernetes deployment.
-
-### 2. Configure
-
-Create a `.env` file:
-
-```bash
-# Copy the sample
-cp env.sample .env
-```
-
-**For Basic Auth (recommended for most users):**
-```dotenv
+# 1. Create a minimal configuration
+cat > .env << EOF
 NEXTCLOUD_HOST=https://your.nextcloud.instance.com
 NEXTCLOUD_USERNAME=your_username
 NEXTCLOUD_PASSWORD=your_app_password
-```
+EOF

-**For OAuth (experimental - requires patches):**
-```dotenv
-NEXTCLOUD_HOST=https://your.nextcloud.instance.com
-```
-
-See [Configuration Guide](docs/configuration.md) for all options.
-
-### 3. Set Up Authentication
-
-**Basic Auth Setup (recommended):**
-1. Create an app password in Nextcloud (Settings → Security → Devices & sessions)
-2. Add credentials to `.env` file
-3. Start the server
-
-**OAuth Setup (experimental):**
-1. Install Nextcloud OIDC apps (`oidc` v1.10.0+ + `user_oidc`)
-2. **Apply required patch** to `user_oidc` app for Bearer token support (see [OAuth Upstream Status](docs/oauth-upstream-status.md))
-3. Enable dynamic client registration or create an OIDC client with id & secret
-4. Configure Bearer token validation in `user_oidc`
-5. Start the server
-
-See [OAuth Quick Start](docs/quickstart-oauth.md) for 5-minute setup or [OAuth Setup Guide](docs/oauth-setup.md) for detailed instructions.
-
-### 4. Run the Server
-
-```bash
-# Load environment variables
-export $(grep -v '^#' .env | xargs)
-
-# Start with Basic Auth (default)
-uv run nextcloud-mcp-server
-
-# Or start with OAuth (experimental - requires patches)
-uv run nextcloud-mcp-server --oauth
-
-# Or with Docker
+# 2. Start the server
 docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
+
+# 3. Test the connection
+curl http://127.0.0.1:8000/health/ready
+
+# 4. Connect to the endpoint
+http://127.0.0.1:8000/sse
+
+# Or with --transport streamable-http
+http://127.0.0.1:8000/mcp
 ```

-The server starts on `http://127.0.0.1:8000` by default.
+**Next Steps:**
+- Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
+- See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)

-See [Running the Server](docs/running.md) for more options.
+## Key Features

-### 5. Connect an MCP Client
+- **90+ MCP Tools** - Comprehensive API coverage across 8 Nextcloud apps
+- **MCP Resources** - Structured data URIs for browsing Nextcloud data
+- **Semantic Search (Experimental)** - Optional vector-powered search for Notes (requires Qdrant + Ollama)
+- **Document Processing** - OCR and text extraction from PDFs, DOCX, images with progress notifications
+- **Flexible Deployment** - Docker, Kubernetes (Helm), VM, or local installation
+- **Production-Ready Auth** - Basic Auth with app passwords (recommended) or OAuth2/OIDC (experimental)
+- **Multiple Transports** - SSE, HTTP, and streamable-http support

-Test with MCP Inspector:
+## Supported Apps

-```bash
-uv run mcp dev
-```
+| App | Tools | Capabilities |
+|-----|-------|--------------|
+| **Notes** | 7 | Full CRUD, keyword search, semantic search |
+| **Calendar** | 20+ | Events, todos (tasks), recurring events, attendees, availability |
+| **Contacts** | 8 | Full CardDAV support, address books |
+| **Files (WebDAV)** | 12 | Filesystem access, OCR/document processing |
+| **Deck** | 15 | Boards, stacks, cards, labels, assignments |
+| **Cookbook** | 13 | Recipe management, URL import (schema.org) |
+| **Tables** | 5 | Row operations on Nextcloud Tables |
+| **Sharing** | 10+ | Create and manage shares |
+| **Semantic Search** | 2+ | Vector search for Notes (experimental, opt-in, requires infrastructure) |

-Or connect from:
- Claude Desktop
- Any MCP-compatible client
+Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request!
+
+## Authentication
+
+> [!IMPORTANT]
+> **OAuth2/OIDC is experimental** and requires a manual patch to the `user_oidc` app:
+> - **Required patch**: Bearer token support ([issue #1221](https://github.com/nextcloud/user_oidc/issues/1221))
+> - **Impact**: Without the patch, most app-specific APIs fail with 401 errors
+> - **Recommendation**: Use Basic Auth for production until upstream patches are merged
+>
+> See [docs/oauth-upstream-status.md](docs/oauth-upstream-status.md) for patch status and workarounds.
+
+**Recommended:** Basic Auth with app-specific passwords provides secure, production-ready authentication. See [docs/authentication.md](docs/authentication.md) for setup details and OAuth configuration.
+
+### Authentication Modes
+
+The server supports two authentication modes:
+
+**Single-User Mode (BasicAuth):**
+- One set of credentials shared by all MCP clients
+- Simple setup: username + app password in environment variables
+- All clients access Nextcloud as the same user
+- Best for: Personal use, development, single-user deployments
+
+**Multi-User Mode (OAuth):**
+- Each MCP client authenticates separately with their own Nextcloud account
+- Per-user scopes and permissions (clients only see tools they're authorized for)
+- More secure: tokens expire, credentials never shared with server
+- Best for: Teams, multi-user deployments, production environments with multiple users
+
+See [docs/authentication.md](docs/authentication.md) for detailed setup instructions.
+
+## Semantic Search
+
+The server provides an experimental RAG pipeline to enable _Semantic Search_ that enables MCP clients to find information in Nextcloud based on **meaning** rather than just keywords. Instead of matching "machine learning" only when those exact words appear, it understands that "neural networks," "AI models," and "deep learning" are semantically related concepts.
+
+**Example:**
+- **Keyword search**: Query "car" only finds notes containing "car"
+- **Semantic search**: Query "car" also finds notes about "automobile," "vehicle," "sedan," "transportation"
+
+This enables natural language queries and helps discover related content across your Nextcloud notes.
+
+> [!NOTE]
+> **Semantic Search is experimental and opt-in:**
+> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
+> - Currently supports Notes app only (multi-app support planned)
+> - Requires additional infrastructure: vector database + embedding service
+> - Answer generation (`nc_semantic_search_answer`) requires MCP client sampling support
+>
+> See [docs/semantic-search-architecture.md](docs/semantic-search-architecture.md) for architecture details and [docs/configuration.md](docs/configuration.md) for setup instructions.

 ## Documentation

 ### Getting Started
- **[Installation](docs/installation.md)** - Install the server
- **[Configuration](docs/configuration.md)** - Environment variables and settings
- **[Authentication](docs/authentication.md)** - OAuth vs BasicAuth
- **[Running the Server](docs/running.md)** - Start and manage the server
+- **[Installation](docs/installation.md)** - Docker, Kubernetes, local, or VM deployment
+- **[Configuration](docs/configuration.md)** - Environment variables and advanced options
+- **[Authentication](docs/authentication.md)** - Basic Auth vs OAuth2/OIDC setup
+- **[Running the Server](docs/running.md)** - Start, manage, and troubleshoot

-### Architecture
- **[Comparison with Context Agent](docs/comparison-context-agent.md)** - How this MCP server differs from Nextcloud's Context Agent
+### Features
+- **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
+- **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
+- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
+- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing

-### OAuth Documentation (Experimental)
- **[OAuth Quick Start](docs/quickstart-oauth.md)** - 5-minute setup guide
- **[OAuth Setup Guide](docs/oauth-setup.md)** - Detailed setup instructions
- **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works
- **[OAuth Troubleshooting](docs/oauth-troubleshooting.md)** - OAuth-specific issues
- **[Upstream Status](docs/oauth-upstream-status.md)** - **Required patches and PRs** ⚠️
-
-### Reference
+### Advanced Topics
+- **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works (experimental)
+- **[OAuth Quick Start](docs/quickstart-oauth.md)** - 5-minute OAuth setup
+- **[OAuth Setup Guide](docs/oauth-setup.md)** - Detailed OAuth configuration
 - **[Troubleshooting](docs/troubleshooting.md)** - Common issues and solutions
-
-### App-Specific Documentation
- [Notes API](docs/notes.md)
- [Calendar (CalDAV)](docs/calendar.md)
- [Contacts (CardDAV)](docs/contacts.md)
- [Cookbook](docs/cookbook.md)
- [Deck](docs/deck.md)
- [Tables](docs/table.md)
- [WebDAV](docs/webdav.md)
-
-## MCP Tools & Resources
-
-The server exposes Nextcloud functionality through MCP tools (for actions) and resources (for data browsing).
-
-### Tools
-
-The server provides 90+ tools across 8 Nextcloud apps. When using OAuth, tools are dynamically filtered based on your granted scopes.
-
-For a complete list of all supported OAuth scopes and their descriptions, see [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes).
-
-#### Available Tool Categories
-
-| App | Tools | Read Scope | Write Scope | Operations |
-|-----|-------|-----------|-------------|------------|
-| **Notes** | 7 | `notes:read` | `notes:write` | Create, read, update, delete, search notes |
-| **Calendar** | 20+ | `calendar:read` `todo:read`  | `calendar:write` `todo:write`   | Events, todos (tasks), calendars, recurring events, attendees |
-| **Contacts** | 8 | `contacts:read` | `contacts:write` | Create, read, update, delete contacts and address books |
-| **Files (WebDAV)** | 12 | `files:read` | `files:write` | List, read, upload, delete, move files; **OCR/document processing** |
-| **Deck** | 15 | `deck:read` | `deck:write` | Boards, stacks, cards, labels, assignments |
-| **Cookbook** | 13 | `cookbook:read` | `cookbook:write` | Recipes, import from URLs, search, categories |
-| **Tables** | 5 | `tables:read` | `tables:write` | Row operations on Nextcloud Tables |
-| **Sharing** | 10+ | `sharing:read` | `sharing:write` | Create, manage, delete shares |
-
-#### Document Processing (Optional)
-
-The WebDAV file reading tool (`nc_webdav_read_file`) supports **automatic text extraction** from documents and images:
-
-**Supported Formats:**
- **Documents**: PDF, DOCX, PPTX, XLSX, RTF, ODT, EPUB
- **Images**: PNG, JPEG, TIFF, BMP (with OCR)
- **Email**: EML, MSG files
-
-**Features:**
- **Progress Notifications**: Long-running OCR operations (up to 120s) send progress updates every 10 seconds to prevent client timeouts
- **Pluggable Architecture**: Multiple processor backends (Unstructured.io, Tesseract, custom HTTP APIs)
- **Automatic Detection**: Files are processed based on MIME type
- **Graceful Fallback**: Returns base64-encoded content if processing fails
-
-**Configuration:**
-```dotenv
-# Enable document processing (optional)
-ENABLE_DOCUMENT_PROCESSING=true
-
-# Unstructured.io processor (cloud/API-based, supports many formats)
-ENABLE_UNSTRUCTURED=true
-UNSTRUCTURED_API_URL=http://localhost:8002
-UNSTRUCTURED_STRATEGY=auto  # auto, fast, or hi_res
-UNSTRUCTURED_LANGUAGES=eng,deu
-PROGRESS_INTERVAL=10  # Progress update interval in seconds
-
-# Tesseract processor (local OCR, images only)
-ENABLE_TESSERACT=false
-TESSERACT_LANG=eng
-
-# Custom HTTP processor
-ENABLE_CUSTOM_PROCESSOR=false
-CUSTOM_PROCESSOR_URL=http://localhost:9000/process
-CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg
-```
-
-**Example Usage:**
-```
-AI: "Read the contents of Documents/report.pdf"
-→ Uses nc_webdav_read_file tool with automatic OCR processing
-→ Returns extracted text with parsing metadata
-→ Sends progress updates during long operations
-```
-
-See [env.sample](env.sample) for complete configuration options.
-
-**Example Tools:**
- `nc_notes_create_note` - Create a new note
- `nc_cookbook_import_recipe` - Import recipes from URLs with schema.org metadata
- `deck_create_card` - Create a Deck card
- `nc_calendar_create_event` - Create a calendar event
- `nc_calendar_create_todo` - Create a CalDAV task/todo
- `nc_contacts_create_contact` - Create a contact
- `nc_webdav_upload_file` - Upload a file to Nextcloud
- And 80+ more...
-
-> [!TIP]
-> **OAuth Scope Filtering**: When connecting via OAuth, MCP clients will only see tools for which you've granted access. For example, granting only `notes:read` and `notes:write` will show 7 Notes tools instead of all 90+ tools. See [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes) for the complete scope reference, or [OAuth Troubleshooting - Limited Scopes](docs/oauth-troubleshooting.md#limited-scopes---only-seeing-notes-tools) if you're only seeing a subset of tools.
->
-> **Known Issue**: Claude Code and some other MCP clients may only request/grant Notes scopes during initial connection. Track progress at [#234](https://github.com/cbcoutinho/nextcloud-mcp-server/issues/234).
-
-### Resources
-Resources provide read-only access to Nextcloud data:
- `nc://capabilities` - Server capabilities
- `cookbook://version` - Cookbook app version info
- `nc://Deck/boards/{board_id}` - Deck board data
- `notes://settings` - Notes app settings
- And more...
-
-Run `uv run nextcloud-mcp-server --help` to see all available options.
+- **[Comparison with Context Agent](docs/comparison-context-agent.md)** - When to use each approach

 ## Examples

@@ -289,45 +164,31 @@ AI: "Create a note called 'Meeting Notes' with today's agenda"
 → Uses nc_notes_create_note tool
 ```

-### Manage Recipes
+### Import Recipes
 ```
-AI: "Import the recipe from this URL: https://www.example.com/recipe/chocolate-cake"
-→ Uses nc_cookbook_import_recipe tool to extract schema.org metadata
+AI: "Import the recipe from https://www.example.com/recipe/chocolate-cake"
+→ Uses nc_cookbook_import_recipe tool with schema.org metadata extraction
 ```

-### Manage Calendar
+### Schedule Meetings
 ```
 AI: "Schedule a team meeting for next Tuesday at 2pm"
 → Uses nc_calendar_create_event tool
 ```

-### Organize Files
+### Manage Files
 ```
 AI: "Create a folder called 'Project X' and move all PDFs there"
-→ Uses WebDAV tools (nc_webdav_create_directory, nc_webdav_move)
+→ Uses nc_webdav_create_directory and nc_webdav_move tools
 ```

-### Project Management
+### Semantic Search (Experimental, Opt-in)
 ```
-AI: "Create a new Deck board for Q1 planning with Todo, In Progress, and Done stacks"
-→ Uses deck_create_board and deck_create_stack tools
+AI: "Find notes related to machine learning concepts"
+→ Uses nc_semantic_search to find semantically similar notes (requires Qdrant + Ollama setup)
 ```

-## Transport Protocols
-
-The server supports multiple MCP transport protocols:
-
- **streamable-http** (recommended) - Modern streaming protocol
- **sse** (default, deprecated) - Server-Sent Events for backward compatibility
- **http** - Standard HTTP protocol
-
-```bash
-# Use streamable-http (recommended)
-uv run nextcloud-mcp-server --transport streamable-http
-```
-
-> [!WARNING]
-> SSE transport is deprecated and will be removed in a future MCP specification version. Please migrate to `streamable-http`.
+**Note:** For AI-generated answers with citations, use `nc_semantic_search_answer` (requires MCP client with sampling support).

 ## Contributing

@@ -335,17 +196,17 @@ Contributions are welcome!

 - Report bugs or request features: [GitHub Issues](https://github.com/cbcoutinho/nextcloud-mcp-server/issues)
 - Submit improvements: [Pull Requests](https://github.com/cbcoutinho/nextcloud-mcp-server/pulls)
- Read [CLAUDE.md](CLAUDE.md) for development guidelines
+- Development guidelines: [CLAUDE.md](CLAUDE.md)

 ## Security

 [![MseeP.ai Security Assessment](https://mseep.net/pr/cbcoutinho-nextcloud-mcp-server-badge.png)](https://mseep.ai/app/cbcoutinho-nextcloud-mcp-server)

 This project takes security seriously:
- OAuth2/OIDC support (experimental - requires upstream patches)
- Basic Auth with app-specific passwords (recommended)
- No credential storage with OAuth mode
+- Production-ready Basic Auth with app-specific passwords
+- OAuth2/OIDC support (experimental, requires upstream patches)
 - Per-user access tokens
+- No credential storage in OAuth mode
 - Regular security assessments

 Found a security issue? Please report it privately to the maintainers.
@@ -363,3 +224,4 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo
 - [Model Context Protocol](https://github.com/modelcontextprotocol)
 - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
 - [Nextcloud](https://nextcloud.com/)
+```
@@ -2,4 +2,30 @@

 set -euox pipefail

-php /var/www/html/occ app:enable notes
+echo "Installing and configuring notes app for testing..."
+
+# Check if development notes app is mounted at /opt/apps/notes
+if [ -d /opt/apps/notes ]; then
+    echo "Development notes app found at /opt/apps/notes"
+
+    # Remove any existing notes app in apps (from app store or old symlink)
+    if [ -e /var/www/html/custom_apps/notes ]; then
+        echo "Removing existing notes in apps..."
+        rm -rf /var/www/html/custom_apps/notes
+    fi
+
+    # Create symlink from apps to the mounted development version
+    # Per Nextcloud docs: apps outside server root need symlinks in server root
+    echo "Creating symlink: custom_apps/notes -> /opt/apps/notes"
+    ln -sf /opt/apps/notes /var/www/html/custom_apps/notes
+
+    echo "Enabling notes app from /opt/apps (development mode via symlink)"
+    php /var/www/html/occ app:enable notes
+elif [ -d /var/www/html/custom_apps/notes ]; then
+    echo "notes app directory found in apps (already installed)"
+    php /var/www/html/occ app:enable notes
+else
+    echo "notes app not found, installing from app store..."
+    php /var/www/html/occ app:install notes
+    php /var/www/html/occ app:enable notes
+fi
@@ -35,5 +35,6 @@ php /var/www/html/occ config:app:set oidc dynamic_client_registration --value='t
 php /var/www/html/occ config:app:set oidc proof_key_for_code_exchange --value=true --type=boolean
 php /var/www/html/occ config:app:set oidc allow_user_settings --value='enabled'
 php /var/www/html/occ config:app:set oidc default_token_type --value='jwt'
+php /var/www/html/occ config:app:set oidc default_resource_identifier --value='http://localhost:8080'

 echo "OIDC app installed and configured successfully"
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+php /var/www/html/occ config:app:set --value false firstrunwizard wizard_enabled
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
+  <rect width="512" height="512" rx="80" ry="80" fill="#0082C9"/>
+  <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+</svg>
@@ -0,0 +1 @@
+charts/
@@ -0,0 +1,9 @@
+dependencies:
+- name: qdrant
+  repository: https://qdrant.github.io/qdrant-helm
+  version: 1.16.0
+- name: ollama
+  repository: https://otwld.github.io/ollama-helm
+  version: 1.35.0
+digest: sha256:da8db198b12ce0252df220fabb297cfe69186edb8e67952c52e05de778189b92
+generated: "2025-11-21T11:09:07.997781541Z"
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.24.0
-appVersion: "0.24.0"
+version: 0.48.0
+appVersion: "0.48.0"
 keywords:
  - nextcloud
  - mcp
@@ -21,3 +21,16 @@ home: https://github.com/cbcoutinho/nextcloud-mcp-server
 sources:
  - https://github.com/cbcoutinho/nextcloud-mcp-server
 icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
+annotations:
+  # Grafana dashboard support
+  grafana_dashboard: "true"
+  grafana_dashboard_folder: "Nextcloud MCP"
+dependencies:
+  - name: qdrant
+    version: "1.16.0"
+    repository: https://qdrant.github.io/qdrant-helm
+    condition: qdrant.networkMode.deploySubchart
+  - name: ollama
+    version: "1.35.0"
+    repository: https://otwld.github.io/ollama-helm
+    condition: ollama.enabled
@@ -14,8 +14,12 @@ This Helm chart deploys the Nextcloud MCP (Model Context Protocol) Server on a K
 ### Quick Start with Basic Authentication

 ```bash
+# Add the Helm repository
+helm repo add nextcloud-mcp https://cbcoutinho.github.io/nextcloud-mcp-server
+helm repo update
+
 # Install with basic auth (recommended for most users)
-helm install nextcloud-mcp ./helm/nextcloud-mcp-server \
+helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
  --set nextcloud.host=https://cloud.example.com \
  --set auth.basic.username=myuser \
  --set auth.basic.password=mypassword
@@ -47,7 +51,7 @@ resources:
 Install with your custom values:

 ```bash
-helm install nextcloud-mcp ./helm/nextcloud-mcp-server -f custom-values.yaml
+helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server -f custom-values.yaml
 ```

 ### OAuth Authentication Mode (Experimental)
@@ -202,6 +206,146 @@ The application exposes HTTP health check endpoints:
 | `documentProcessing.unstructured.apiUrl` | Unstructured API URL | `http://unstructured:8000` |
 | `documentProcessing.tesseract.enabled` | Enable Tesseract OCR | `false` |

+#### Vector Search & Semantic Capabilities (Optional)
+
+Enable semantic search capabilities by deploying a vector database (Qdrant) and embedding service (Ollama or OpenAI).
+
+**Vector Sync Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `vectorSync.enabled` | Enable background vector synchronization | `false` |
+| `vectorSync.scanInterval` | Scan interval in seconds | `3600` |
+| `vectorSync.processorWorkers` | Number of concurrent processor workers | `3` |
+| `vectorSync.queueMaxSize` | Maximum queue size for pending documents | `10000` |
+
+**Document Chunking Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `documentChunking.chunkSize` | Number of words per chunk for embedding | `512` |
+| `documentChunking.chunkOverlap` | Number of overlapping words between chunks | `50` |
+
+**Chunking Strategy:**
+- **Small chunks (256-384)**: Better precision for searches, more storage overhead
+- **Medium chunks (512-768)**: Balanced approach (recommended for most use cases)
+- **Large chunks (1024+)**: Better context preservation, less precise matching
+- **Overlap**: Should be 10-20% of chunk size to preserve context across boundaries
+
+**Qdrant Vector Database:**
+
+Qdrant is deployed as a subchart when `qdrant.enabled` is `true`. All configuration values are passed through to the [qdrant/qdrant](https://github.com/qdrant/qdrant-helm) chart.
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `qdrant.enabled` | Deploy Qdrant as a subchart | `false` |
+| `qdrant.replicaCount` | Number of Qdrant replicas | `1` |
+| `qdrant.image.tag` | Qdrant version | `v1.12.5` |
+| `qdrant.apiKey` | Optional API key for authentication | `""` |
+| `qdrant.persistence.size` | Storage size for vector data | `10Gi` |
+| `qdrant.persistence.storageClass` | Storage class | `""` |
+| `qdrant.resources.requests.cpu` | CPU request | `200m` |
+| `qdrant.resources.requests.memory` | Memory request | `512Mi` |
+| `qdrant.resources.limits.cpu` | CPU limit | `1000m` |
+| `qdrant.resources.limits.memory` | Memory limit | `2Gi` |
+
+**Ollama Embedding Service:**
+
+Ollama is deployed as a subchart when `ollama.enabled` is `true`. All configuration values are passed through to the [ollama/ollama](https://github.com/otwld/ollama-helm) chart. Alternatively, set `ollama.url` to use an external Ollama instance.
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `ollama.enabled` | Deploy Ollama as a subchart | `false` |
+| `ollama.url` | External Ollama URL (use with `enabled: false`) | `""` |
+| `ollama.embeddingModel` | Embedding model to use | `nomic-embed-text` |
+| `ollama.verifySsl` | Verify SSL certificates | `true` |
+| `ollama.replicaCount` | Number of Ollama replicas | `1` |
+| `ollama.ollama.models.pull` | Models to pull on startup | `["nomic-embed-text"]` |
+| `ollama.persistentVolume.enabled` | Enable persistent storage | `true` |
+| `ollama.persistentVolume.size` | Storage size for models | `20Gi` |
+| `ollama.resources.requests.cpu` | CPU request | `500m` |
+| `ollama.resources.requests.memory` | Memory request | `1Gi` |
+| `ollama.resources.limits.cpu` | CPU limit | `2000m` |
+| `ollama.resources.limits.memory` | Memory limit | `4Gi` |
+
+**OpenAI Embedding Provider (Alternative):**
+
+Use OpenAI or any OpenAI-compatible API instead of Ollama.
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `openai.enabled` | Enable OpenAI embedding provider | `false` |
+| `openai.apiKey` | OpenAI API key | `""` |
+| `openai.existingSecret` | Use existing secret for API key | `""` |
+| `openai.secretKey` | Key in secret containing API key | `api-key` |
+| `openai.baseUrl` | Custom API endpoint (optional) | `""` |
+
+#### Observability & Monitoring
+
+The chart includes comprehensive observability features including Prometheus metrics, OpenTelemetry tracing, and Grafana dashboards.
+
+**Metrics Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.metrics.enabled` | Enable Prometheus metrics | `true` |
+| `observability.metrics.port` | Metrics port | `9090` |
+| `observability.metrics.path` | Metrics endpoint path | `/metrics` |
+
+**Tracing Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.tracing.enabled` | Enable OpenTelemetry tracing | `false` |
+| `observability.tracing.endpoint` | OTLP collector endpoint | `""` |
+| `observability.tracing.serviceName` | Service name in traces | `nextcloud-mcp-server` |
+| `observability.tracing.samplingRate` | Trace sampling rate (0.0-1.0) | `1.0` |
+
+**Logging Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.logging.format` | Log format (json or text) | `json` |
+| `observability.logging.level` | Log level | `INFO` |
+| `observability.logging.includeTraceContext` | Include trace IDs in logs | `true` |
+
+**ServiceMonitor (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `serviceMonitor.enabled` | Create ServiceMonitor resource | `false` |
+| `serviceMonitor.interval` | Scrape interval | `30s` |
+| `serviceMonitor.scrapeTimeout` | Scrape timeout | `10s` |
+| `serviceMonitor.labels` | Additional labels for ServiceMonitor | `{}` |
+
+**PrometheusRule (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `prometheusRule.enabled` | Create PrometheusRule with alert rules | `false` |
+| `prometheusRule.labels` | Additional labels for PrometheusRule | `{}` |
+
+**Grafana Dashboards:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `dashboards.enabled` | Enable automatic dashboard provisioning | `false` |
+| `dashboards.grafanaFolder` | Grafana folder name for dashboards | `Nextcloud MCP` |
+| `dashboards.labels` | Additional labels for dashboard ConfigMap | `{}` |
+| `dashboards.annotations` | Additional annotations for dashboard ConfigMap | `{}` |
+
+When `dashboards.enabled` is `true`, a ConfigMap with the Grafana dashboard is created with the `grafana_dashboard: "1"` label. This enables automatic discovery by Grafana sidecar containers (commonly used with kube-prometheus-stack).
+
+The dashboard provides comprehensive monitoring including:
+- HTTP request metrics (RED pattern: Rate, Errors, Duration)
+- MCP tool performance and errors
+- Nextcloud API performance by app (notes, calendar, contacts, etc.)
+- OAuth token operations and cache hit rates
+- External dependency health (Nextcloud, Qdrant, Keycloak, Unstructured API)
+- Vector sync processing pipeline (when enabled)
+
+For manual import or more details, see `charts/nextcloud-mcp-server/dashboards/README.md`.
+
 ## Examples

 ### Example 1: Basic Auth with Ingress
@@ -379,18 +523,106 @@ affinity:
          topologyKey: kubernetes.io/hostname
 ```

+### Example 5: Semantic Search with Qdrant and Ollama
+
+Deploy with vector search capabilities using embedded Qdrant and Ollama:
+
+```yaml
+nextcloud:
+  host: https://cloud.example.com
+
+auth:
+  mode: basic
+  basic:
+    username: admin
+    password: secure-password
+
+# Enable vector sync
+vectorSync:
+  enabled: true
+  scanInterval: 1800  # Scan every 30 minutes
+  processorWorkers: 5
+
+# Deploy Qdrant as a subchart
+qdrant:
+  enabled: true
+  persistence:
+    size: 20Gi
+    storageClass: fast-ssd
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+    limits:
+      cpu: 2000m
+      memory: 4Gi
+
+# Deploy Ollama as a subchart
+ollama:
+  enabled: true
+  embeddingModel: nomic-embed-text
+  persistentVolume:
+    size: 30Gi
+    storageClass: standard
+  resources:
+    requests:
+      cpu: 1000m
+      memory: 2Gi
+    limits:
+      cpu: 4000m
+      memory: 8Gi
+```
+
+Or use an external Ollama instance:
+
+```yaml
+vectorSync:
+  enabled: true
+
+qdrant:
+  enabled: true
+
+# Use external Ollama instead of deploying subchart
+ollama:
+  enabled: false
+  url: "http://ollama.ai-services.svc.cluster.local:11434"
+  embeddingModel: nomic-embed-text
+```
+
+Or use OpenAI for embeddings:
+
+```yaml
+vectorSync:
+  enabled: true
+
+qdrant:
+  enabled: true
+
+# Use OpenAI instead of Ollama
+openai:
+  enabled: true
+  apiKey: "sk-..."
+  # Or use existing secret:
+  # existingSecret: openai-api-key
+  # secretKey: api-key
+```
+
 ## Upgrading

 ### To upgrade an existing deployment:

 ```bash
-helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server -f custom-values.yaml
+# Update the repository
+helm repo update
+
+# Upgrade with your custom values
+helm upgrade nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server -f custom-values.yaml
 ```

 ### To upgrade with new values:

 ```bash
-helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server \
+helm upgrade nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
  --set resources.limits.memory=1Gi
 ```

@@ -0,0 +1,161 @@
+# Grafana Dashboards
+
+This directory contains example Grafana dashboards for monitoring the Nextcloud MCP Server.
+
+## Dashboards
+
+### nextcloud-mcp-server.json
+
+All-in-one Operations Dashboard with comprehensive monitoring across all system components.
+
+#### Overview Row
+High-level metrics for quick health assessment:
+- **Request Rate** (stat): Total requests per second
+- **Error Rate** (stat): Percentage of 5xx errors with color thresholds
+- **P95 Latency** (stat): 95th percentile request latency
+- **Active Requests** (stat): Current in-flight requests
+
+#### HTTP Metrics (RED Pattern)
+Core request/error/duration metrics:
+- **Request Rate by Endpoint** (timeseries): RPS breakdown by endpoint
+- **Error Rate by Status Code** (timeseries): Error rates for 4xx/5xx codes
+- **Latency Percentiles** (timeseries): P50, P95, P99 latency trends
+- **Status Code Distribution** (piechart): Percentage breakdown of all status codes
+
+#### MCP Tools Row
+MCP-specific tool performance:
+- **Top Tools by Call Volume** (bargauge): Top 10 most-called tools
+- **Tool Error Rate** (timeseries): Error rates per tool
+- **Tool Execution Duration** (timeseries): P95 latency by tool
+
+#### Nextcloud API Row
+Backend API performance metrics:
+- **API Calls by App** (timeseries): Request rate per Nextcloud app (notes, calendar, contacts, etc.)
+- **API Latency by App** (timeseries): P95 latency per app
+- **API Retries by Reason** (timeseries): Retry patterns (429, timeout, connection errors)
+- **API Error Rate** (stat): Overall API error percentage
+
+#### OAuth & Authentication Row
+OAuth token operations and caching:
+- **Token Validations** (timeseries): Success/failure rates for token validation
+- **Token Exchange Operations** (timeseries): RFC 8693 token exchange operations
+- **Token Cache Hit Rate** (stat): Percentage of cache hits (color-coded: red<50%, yellow<80%, green≥80%)
+- **Refresh Token Operations** (timeseries): Refresh token storage operations by type
+
+#### Dependencies & Health Row
+External dependency status monitoring:
+- **Nextcloud Health** (stat): UP/DOWN status with color coding
+- **Qdrant Health** (stat): Vector database health status
+- **Keycloak Health** (stat): Identity provider health status
+- **Unstructured API Health** (stat): Document processing API status
+- **Health Check Duration** (timeseries): Health check latency by dependency
+- **Database Operation Latency** (timeseries): P95 latency for DB operations (SQLite, Qdrant)
+
+#### Vector Sync Row (when enabled)
+Document processing pipeline metrics:
+- **Documents Processed Rate** (timeseries): Processing throughput by status (success/failure)
+- **Processing Queue Depth** (gauge): Current queue size with thresholds (yellow>50, red>100)
+- **Qdrant Operations** (timeseries): Vector database operations by type
+- **Document Processing Duration** (timeseries): P95 processing latency
+
+## Importing to Grafana
+
+### Manual Import
+
+1. Open Grafana UI
+2. Navigate to Dashboards → Import
+3. Upload `nextcloud-mcp-server.json`
+4. Select your Prometheus data source
+5. Click "Import"
+
+### Automated Import (Helm Chart)
+
+The Helm chart now supports automatic dashboard provisioning via Grafana sidecar pattern.
+
+#### Option 1: Using Helm Chart (Recommended)
+
+Enable dashboard provisioning in your Helm values:
+
+```yaml
+# values.yaml for nextcloud-mcp-server chart
+dashboards:
+  enabled: true
+  grafanaFolder: "Nextcloud MCP"  # Folder name in Grafana
+  labels: {}  # Additional labels if needed
+```
+
+Then deploy or upgrade:
+
+```bash
+helm upgrade --install nextcloud-mcp nextcloud-mcp-server \
+  --set dashboards.enabled=true
+```
+
+The dashboard will be automatically imported by Grafana if the sidecar is configured
+to watch for ConfigMaps with label `grafana_dashboard: "1"`.
+
+#### Option 2: Using kube-prometheus-stack
+
+If using kube-prometheus-stack with Grafana sidecar enabled, the dashboard will be
+automatically discovered and imported. Ensure your Grafana deployment has:
+
+```yaml
+# kube-prometheus-stack values
+grafana:
+  sidecar:
+    dashboards:
+      enabled: true
+      label: grafana_dashboard
+      folder: /tmp/dashboards
+      provider:
+        foldersFromFilesStructure: true
+```
+
+#### Option 3: Manual ConfigMap Creation
+
+For other Grafana setups, create a ConfigMap manually:
+
+```bash
+kubectl create configmap nextcloud-mcp-dashboard \
+  --from-file=nextcloud-mcp-server.json \
+  -n monitoring
+
+# Add sidecar discovery label
+kubectl label configmap nextcloud-mcp-dashboard \
+  grafana_dashboard=1 \
+  -n monitoring
+
+# Add folder annotation (annotations support spaces, unlike labels)
+kubectl annotate configmap nextcloud-mcp-dashboard \
+  grafana_folder="Nextcloud MCP" \
+  -n monitoring
+```
+
+## Dashboard Variables
+
+The dashboard includes four template variables for dynamic filtering:
+
+- **datasource**: Select your Prometheus data source
+- **namespace**: Filter metrics by Kubernetes namespace (supports "All")
+- **pod**: Filter by specific pod(s) - multi-select enabled (supports "All")
+- **interval**: Query interval for rate calculations (1m, 5m, 10m, 30m, 1h - default: 5m)
+
+## Customization
+
+You can customize the dashboard by:
+
+1. Adjusting refresh rate (default: 30s)
+2. Modifying time range (default: last 6 hours)
+3. Adding new panels for specific metrics
+4. Adjusting thresholds in existing panels
+
+## Metrics Reference
+
+All metrics are documented in `/docs/observability.md`. Key metric prefixes:
+
+- `mcp_http_*` - HTTP server metrics
+- `mcp_tool_*` - MCP tool invocation metrics
+- `mcp_nextcloud_api_*` - Nextcloud API call metrics
+- `mcp_oauth_*` - OAuth token validation metrics
+- `mcp_vector_sync_*` - Vector database sync metrics
+- `mcp_db_*` - Database operation metrics
@@ -69,6 +69,57 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
   {{- end }}
 {{- end }}

+{{- if .Values.vectorSync.enabled }}
+
+5. Vector Search & Semantic Capabilities:
+   - Vector Sync: Enabled
+   - Scan Interval: {{ .Values.vectorSync.scanInterval }}s
+   - Processor Workers: {{ .Values.vectorSync.processorWorkers }}
+   {{- if .Values.qdrant.enabled }}
+   - Qdrant: Deployed as subchart ({{ .Release.Name }}-qdrant:6333)
+   {{- else }}
+   - Qdrant: Not deployed (configure external instance)
+   {{- end }}
+   {{- if .Values.ollama.enabled }}
+   - Ollama: Deployed as subchart ({{ .Release.Name }}-ollama:11434)
+   - Embedding Model: {{ .Values.ollama.embeddingModel }}
+   {{- else if .Values.ollama.url }}
+   - Ollama: Using external instance at {{ .Values.ollama.url }}
+   - Embedding Model: {{ .Values.ollama.embeddingModel }}
+   {{- else if .Values.openai.enabled }}
+   - OpenAI: Enabled for embeddings
+   {{- else }}
+   - WARNING: No embedding provider configured (Ollama or OpenAI required)
+   {{- end }}
+
+   Check vector sync status:
+   kubectl --namespace {{ .Release.Namespace }} exec -it deploy/{{ include "nextcloud-mcp-server.fullname" . }} -- curl -s http://localhost:{{ include "nextcloud-mcp-server.port" . }}/user/page | grep "Vector Sync"
+{{- end }}
+
+{{- if .Values.dashboards.enabled }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Enabled
+   - ConfigMap: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+   - Grafana Folder: {{ .Values.dashboards.grafanaFolder }}
+
+   The dashboard will be automatically imported by Grafana if the sidecar is configured
+   to watch for ConfigMaps with label "grafana_dashboard: 1".
+
+   To manually import the dashboard:
+   kubectl --namespace {{ .Release.Namespace }} get configmap {{ include "nextcloud-mcp-server.fullname" . }}-dashboard -o jsonpath='{.data.nextcloud-mcp-server\.json}' | jq . > dashboard.json
+
+   Then import dashboard.json via Grafana UI (Dashboards → Import).
+{{- else }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Disabled
+   - To enable automatic dashboard provisioning, set: dashboards.enabled=true
+
+   Manual import option:
+   The dashboard JSON is available in the chart at charts/nextcloud-mcp-server/dashboards/nextcloud-mcp-server.json
+{{- end }}
+
 For more information and documentation:
 - GitHub: https://github.com/cbcoutinho/nextcloud-mcp-server
 - Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
@@ -94,6 +94,17 @@ Create the name of the PVC to use for OAuth storage
 {{- end }}
 {{- end }}

+{{/*
+Create the name of the PVC to use for Qdrant local persistent storage
+*/}}
+{{- define "nextcloud-mcp-server.qdrantPvcName" -}}
+{{- if .Values.qdrant.localPersistence.existingClaim }}
+{{- .Values.qdrant.localPersistence.existingClaim }}
+{{- else }}
+{{- include "nextcloud-mcp-server.fullname" . }}-qdrant-data
+{{- end }}
+{{- end }}
+
 {{/*
 Return the MCP server port
 */}}
@@ -0,0 +1,25 @@
+{{- if .Values.dashboards.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.dashboards.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana sidecar discovery label
+    grafana_dashboard: "1"
+  annotations:
+    {{- with .Values.dashboards.annotations }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana folder name (annotations support spaces, unlike labels)
+    {{- if .Values.dashboards.grafanaFolder }}
+    grafana_folder: {{ .Values.dashboards.grafanaFolder | quote }}
+    {{- end }}
+data:
+  nextcloud-mcp-server.json: |-
+{{ .Files.Get "dashboards/nextcloud-mcp-server.json" | indent 4 }}
+{{- end }}
@@ -5,6 +5,8 @@ metadata:
  labels:
    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
 spec:
+  strategy:
+    type: Recreate
  {{- if not .Values.autoscaling.enabled }}
  replicas: {{ .Values.replicaCount }}
  {{- end }}
@@ -56,6 +58,11 @@ spec:
            - name: http
              containerPort: {{ include "nextcloud-mcp-server.port" . }}
              protocol: TCP
+            {{- if .Values.observability.metrics.enabled }}
+            - name: metrics
+              containerPort: {{ .Values.observability.metrics.port }}
+              protocol: TCP
+            {{- end }}
          env:
            # Nextcloud connection
            - name: NEXTCLOUD_HOST
@@ -140,6 +147,90 @@ spec:
              value: {{ .Values.documentProcessing.custom.types | quote }}
            {{- end }}
            {{- end }}
+            # Vector Sync
+            - name: VECTOR_SYNC_ENABLED
+              value: {{ .Values.vectorSync.enabled | quote }}
+            {{- if .Values.vectorSync.enabled }}
+            - name: VECTOR_SYNC_SCAN_INTERVAL
+              value: {{ .Values.vectorSync.scanInterval | quote }}
+            - name: VECTOR_SYNC_PROCESSOR_WORKERS
+              value: {{ .Values.vectorSync.processorWorkers | quote }}
+            - name: VECTOR_SYNC_QUEUE_MAX_SIZE
+              value: {{ .Values.vectorSync.queueMaxSize | quote }}
+            {{- end }}
+            # Document Chunking (always set, used by vector sync processor)
+            - name: DOCUMENT_CHUNK_SIZE
+              value: {{ .Values.documentChunking.chunkSize | quote }}
+            - name: DOCUMENT_CHUNK_OVERLAP
+              value: {{ .Values.documentChunking.chunkOverlap | quote }}
+            # Qdrant Vector Database
+            {{- if eq .Values.qdrant.mode "network" }}
+            # Network mode: Use dedicated Qdrant service
+            {{- if .Values.qdrant.networkMode.deploySubchart }}
+            - name: QDRANT_URL
+              value: "http://{{ .Release.Name }}-qdrant:6333"
+            {{- else if .Values.qdrant.networkMode.externalUrl }}
+            - name: QDRANT_URL
+              value: {{ .Values.qdrant.networkMode.externalUrl | quote }}
+            {{- end }}
+            {{- if or .Values.qdrant.networkMode.apiKey .Values.qdrant.networkMode.existingSecret }}
+            - name: QDRANT_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.qdrant.networkMode.existingSecret | default (printf "%s-qdrant" .Release.Name) }}
+                  key: {{ .Values.qdrant.networkMode.secretKey }}
+            {{- end }}
+            {{- else if eq .Values.qdrant.mode "persistent" }}
+            # Persistent local mode: File-based storage
+            - name: QDRANT_LOCATION
+              value: {{ .Values.qdrant.localPersistence.dataPath | quote }}
+            {{- else }}
+            # In-memory mode (default): Ephemeral storage
+            - name: QDRANT_LOCATION
+              value: ":memory:"
+            {{- end }}
+            - name: QDRANT_COLLECTION
+              value: {{ .Values.qdrant.collection | quote }}
+            # Ollama Embedding Service
+            {{- if or .Values.ollama.enabled .Values.ollama.url }}
+            - name: OLLAMA_BASE_URL
+              value: {{ .Values.ollama.url | default (printf "http://%s-ollama:11434" .Release.Name) | quote }}
+            - name: OLLAMA_EMBEDDING_MODEL
+              value: {{ .Values.ollama.embeddingModel | quote }}
+            - name: OLLAMA_VERIFY_SSL
+              value: {{ .Values.ollama.verifySsl | quote }}
+            {{- end }}
+            # OpenAI Embedding Provider (alternative to Ollama)
+            {{- if .Values.openai.enabled }}
+            - name: OPENAI_API_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ .Values.openai.existingSecret | default (printf "%s-openai" (include "nextcloud-mcp-server.fullname" .)) }}
+                  key: {{ .Values.openai.secretKey }}
+            {{- if .Values.openai.baseUrl }}
+            - name: OPENAI_BASE_URL
+              value: {{ .Values.openai.baseUrl | quote }}
+            {{- end }}
+            {{- end }}
+            # Observability
+            - name: METRICS_ENABLED
+              value: {{ .Values.observability.metrics.enabled | quote }}
+            - name: METRICS_PORT
+              value: {{ .Values.observability.metrics.port | quote }}
+            {{- if .Values.observability.tracing.enabled }}
+            - name: OTEL_EXPORTER_OTLP_ENDPOINT
+              value: {{ .Values.observability.tracing.endpoint | quote }}
+            - name: OTEL_SERVICE_NAME
+              value: {{ .Values.observability.tracing.serviceName | quote }}
+            - name: OTEL_TRACES_SAMPLER_ARG
+              value: {{ .Values.observability.tracing.samplingRate | quote }}
+            {{- end }}
+            - name: LOG_FORMAT
+              value: {{ .Values.observability.logging.format | quote }}
+            - name: LOG_LEVEL
+              value: {{ .Values.observability.logging.level | quote }}
+            - name: LOG_INCLUDE_TRACE_CONTEXT
+              value: {{ .Values.observability.logging.includeTraceContext | quote }}
            {{- with .Values.extraEnv }}
            {{- toYaml . | nindent 12 }}
            {{- end }}
@@ -160,6 +251,10 @@ spec:
            - name: oauth-storage
              mountPath: /app/.oauth
            {{- end }}
+            {{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
+            - name: qdrant-data
+              mountPath: /app/data
+            {{- end }}
            {{- with .Values.volumeMounts }}
            {{- toYaml . | nindent 12 }}
            {{- end }}
@@ -171,6 +266,11 @@ spec:
          persistentVolumeClaim:
            claimName: {{ include "nextcloud-mcp-server.oauthPvcName" . }}
        {{- end }}
+        {{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
+        - name: qdrant-data
+          persistentVolumeClaim:
+            claimName: {{ include "nextcloud-mcp-server.qdrantPvcName" . }}
+        {{- end }}
        {{- with .Values.volumes }}
        {{- toYaml . | nindent 8 }}
        {{- end }}
@@ -0,0 +1,11 @@
+{{- if and .Values.openai.enabled (not .Values.openai.existingSecret) }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-openai
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+type: Opaque
+data:
+  {{ .Values.openai.secretKey }}: {{ .Values.openai.apiKey | b64enc | quote }}
+{{- end }}
@@ -0,0 +1,92 @@
+{{- if and .Values.observability.metrics.enabled .Values.prometheusRule.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.prometheusRule.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  groups:
+    - name: nextcloud-mcp-server.critical
+      interval: 30s
+      rules:
+        - alert: NextcloudMCPServerDown
+          expr: up{job="{{ include "nextcloud-mcp-server.fullname" . }}"} == 0
+          for: 5m
+          labels:
+            severity: critical
+          annotations:
+            summary: "Nextcloud MCP Server is down"
+            description: "{{ `{{` }} $labels.pod {{ `}}` }} has been down for more than 5 minutes."
+
+        - alert: NextcloudMCPHighErrorRate
+          expr: |
+            sum(rate(mcp_http_requests_total{status_code=~"5..", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m]))
+            / sum(rate(mcp_http_requests_total{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m])) > 0.05
+          for: 5m
+          labels:
+            severity: critical
+          annotations:
+            summary: "High error rate on Nextcloud MCP Server"
+            description: "Error rate is {{ `{{` }} printf \"%.2f%%\" (mul $value 100) {{ `}}` }} (threshold: 5%)"
+
+        - alert: NextcloudMCPHighLatency
+          expr: |
+            histogram_quantile(0.95,
+              sum(rate(mcp_http_request_duration_seconds_bucket{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[5m])) by (le, endpoint)
+            ) > 1
+          for: 5m
+          labels:
+            severity: critical
+          annotations:
+            summary: "High latency on Nextcloud MCP Server"
+            description: "P95 latency is {{ `{{` }} printf \"%.2fs\" $value {{ `}}` }} on {{ `{{` }} $labels.endpoint {{ `}}` }} (threshold: 1s)"
+
+        - alert: NextcloudMCPDependencyDown
+          expr: mcp_dependency_health{job="{{ include "nextcloud-mcp-server.fullname" . }}"} == 0
+          for: 2m
+          labels:
+            severity: critical
+          annotations:
+            summary: "Nextcloud MCP dependency is down"
+            description: "Dependency {{ `{{` }} $labels.dependency {{ `}}` }} has been down for more than 2 minutes."
+
+    - name: nextcloud-mcp-server.warning
+      interval: 30s
+      rules:
+        - alert: NextcloudMCPTokenValidationErrors
+          expr: |
+            sum(rate(mcp_oauth_token_validations_total{result="error", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m]))
+            / sum(rate(mcp_oauth_token_validations_total{job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m])) > 0.01
+          for: 10m
+          labels:
+            severity: warning
+          annotations:
+            summary: "High token validation error rate"
+            description: "Token validation error rate is {{ `{{` }} printf \"%.2f%%\" (mul $value 100) {{ `}}` }} (threshold: 1%)"
+
+        - alert: NextcloudMCPVectorSyncQueueHigh
+          expr: mcp_vector_sync_queue_size{job="{{ include "nextcloud-mcp-server.fullname" . }}"} > 100
+          for: 15m
+          labels:
+            severity: warning
+          annotations:
+            summary: "Vector sync queue is high"
+            description: "Vector sync queue size is {{ `{{` }} $value {{ `}}` }} (threshold: 100)"
+
+        - alert: NextcloudMCPQdrantSlowQueries
+          expr: |
+            histogram_quantile(0.95,
+              sum(rate(mcp_db_operation_duration_seconds_bucket{db="qdrant", job="{{ include "nextcloud-mcp-server.fullname" . }}"}[10m])) by (le)
+            ) > 0.5
+          for: 10m
+          labels:
+            severity: warning
+          annotations:
+            summary: "Qdrant queries are slow"
+            description: "P95 Qdrant query latency is {{ `{{` }} printf \"%.2fs\" $value {{ `}}` }} (threshold: 0.5s)"
+{{- end }}
@@ -15,3 +15,21 @@ spec:
    requests:
      storage: {{ .Values.auth.oauth.persistence.size }}
 {{- end }}
+---
+{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled (not .Values.qdrant.localPersistence.existingClaim) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-qdrant-data
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+spec:
+  accessModes:
+    - {{ .Values.qdrant.localPersistence.accessMode }}
+  {{- if .Values.qdrant.localPersistence.storageClass }}
+  storageClassName: {{ .Values.qdrant.localPersistence.storageClass }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.qdrant.localPersistence.size }}
+{{- end }}
@@ -15,5 +15,11 @@ spec:
      targetPort: http
      protocol: TCP
      name: http
+    {{- if .Values.observability.metrics.enabled }}
+    - port: {{ .Values.observability.metrics.port }}
+      targetPort: metrics
+      protocol: TCP
+      name: metrics
+    {{- end }}
  selector:
    {{- include "nextcloud-mcp-server.selectorLabels" . | nindent 4 }}
@@ -0,0 +1,32 @@
+{{- if and .Values.observability.metrics.enabled .Values.serviceMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.serviceMonitor.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+spec:
+  selector:
+    matchLabels:
+      {{- include "nextcloud-mcp-server.selectorLabels" . | nindent 6 }}
+  endpoints:
+    - port: metrics
+      path: {{ .Values.observability.metrics.path }}
+      interval: {{ .Values.serviceMonitor.interval }}
+      scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }}
+      scheme: http
+      relabelings:
+        # Add namespace label
+        - sourceLabels: [__meta_kubernetes_namespace]
+          targetLabel: namespace
+        # Add pod label
+        - sourceLabels: [__meta_kubernetes_pod_name]
+          targetLabel: pod
+        # Add service label
+        - sourceLabels: [__meta_kubernetes_service_name]
+          targetLabel: service
+{{- end }}
@@ -168,6 +168,57 @@ securityContext:
  runAsNonRoot: true
  runAsUser: 1000

+# Observability Configuration
+observability:
+  # Prometheus metrics
+  metrics:
+    enabled: true
+    port: 9090
+    path: /metrics
+
+  # OpenTelemetry tracing
+  tracing:
+    enabled: false
+    endpoint: ""  # e.g., "http://opentelemetry-collector:4317"
+    serviceName: "nextcloud-mcp-server"
+    samplingRate: 1.0
+
+  # Logging configuration
+  logging:
+    format: json  # "json" or "text"
+    level: INFO
+    includeTraceContext: true
+
+# Prometheus ServiceMonitor (requires Prometheus Operator)
+serviceMonitor:
+  enabled: false
+  interval: 30s
+  scrapeTimeout: 10s
+  labels: {}
+  # Additional labels for ServiceMonitor (e.g., for Prometheus selector)
+  # Example: { prometheus: kube-prometheus }
+
+# Prometheus alert rules (requires Prometheus Operator)
+prometheusRule:
+  enabled: false
+  labels: {}
+  # Additional labels for PrometheusRule (e.g., for Prometheus selector)
+  # Example: { prometheus: kube-prometheus }
+
+# Grafana dashboards (requires Grafana with sidecar enabled)
+dashboards:
+  # Enable automatic dashboard provisioning via ConfigMap
+  enabled: false
+  # Grafana folder name where dashboards will be imported
+  # The grafana-sidecar looks for ConfigMaps with label "grafana_dashboard: 1"
+  # and reads the folder name from annotation "grafana_folder" (supports spaces)
+  grafanaFolder: "Nextcloud MCP"
+  # Additional labels for dashboard ConfigMap
+  # These will be added alongside the required "grafana_dashboard: 1" label
+  labels: {}
+  # Additional annotations for dashboard ConfigMap
+  annotations: {}
+
 service:
  type: ClusterIP
  port: 8000
@@ -264,3 +315,151 @@ extraEnvFrom: []
 #     name: my-configmap
 # - secretRef:
 #     name: my-secret
+
+# Vector Sync Configuration
+# Background synchronization of Nextcloud content into vector database for semantic search
+vectorSync:
+  # Enable background vector synchronization
+  enabled: false
+  # Scan interval in seconds (how often to check for changes)
+  scanInterval: 3600
+  # Number of concurrent processor workers
+  processorWorkers: 3
+  # Maximum queue size for documents pending indexing
+  queueMaxSize: 10000
+
+# Document Chunking Configuration
+# Controls how documents are split into chunks before embedding
+# Only relevant when vectorSync.enabled is true
+documentChunking:
+  # Number of words per chunk (default: 512)
+  # Smaller chunks (256-384): Better for precise searches, more chunks to store
+  # Medium chunks (512-768): Balanced approach (recommended for most use cases)
+  # Larger chunks (1024+): Better for context, less precise matching
+  chunkSize: 512
+  # Number of overlapping words between chunks (default: 50)
+  # Recommended: 10-20% of chunkSize for context preservation across boundaries
+  # Must be less than chunkSize
+  chunkOverlap: 50
+
+# Qdrant Vector Database Configuration
+# Three deployment modes available:
+# 1. Local In-Memory: Fast, ephemeral, zero-config (mode: "memory")
+# 2. Local Persistent: File-based, survives restarts (mode: "persistent")
+# 3. Network: Dedicated Qdrant service, production-ready (mode: "network")
+qdrant:
+  # Qdrant mode: "memory", "persistent", or "network"
+  # - memory: In-memory storage (:memory:) - default, zero config, data lost on restart
+  # - persistent: Local file storage - data persists across restarts, suitable for small/medium deployments
+  # - network: Dedicated Qdrant service (see networkMode below)
+  mode: "memory"
+
+  # Collection name for vector data
+  collection: "nextcloud_content"
+
+  # Local persistent mode configuration (only used when mode: "persistent")
+  localPersistence:
+    # Enable persistent volume for local Qdrant data
+    enabled: true
+    # Storage class (leave empty for default)
+    storageClass: ""
+    accessMode: ReadWriteOnce
+    # Size for local Qdrant storage
+    size: 1Gi
+    # Path where Qdrant data is stored (relative to /app/data)
+    # Default: /app/data/qdrant
+    dataPath: "/app/data/qdrant"
+    # Use existing PVC
+    existingClaim: ""
+
+  # Network mode configuration (only used when mode: "network")
+  networkMode:
+    # Deploy Qdrant as a subchart (if true) or use external Qdrant (if false)
+    deploySubchart: false
+    # External Qdrant URL (used when deploySubchart: false)
+    # Example: "http://qdrant.default.svc.cluster.local:6333"
+    externalUrl: ""
+    # Optional API key for Qdrant authentication
+    apiKey: ""
+    # Use existing secret for API key
+    existingSecret: ""
+    secretKey: "api-key"
+
+  # Qdrant subchart configuration (only used when mode: "network" and networkMode.deploySubchart: true)
+  # All values are passed through to the qdrant/qdrant chart.
+  # See https://github.com/qdrant/qdrant-helm for full configuration options.
+  subchart:
+    # Number of Qdrant replicas
+    replicaCount: 1
+    image:
+      # Qdrant version
+      tag: v1.12.5
+    config:
+      cluster:
+        # Enable distributed cluster mode
+        enabled: false
+    # Persistent storage for vector data
+    persistence:
+      size: 10Gi
+      storageClass: ""
+      accessModes:
+        - ReadWriteOnce
+    # Resource limits and requests
+    resources:
+      requests:
+        cpu: 200m
+        memory: 512Mi
+      limits:
+        cpu: 1000m
+        memory: 2Gi
+
+# Ollama Embedding Service
+# Deployed as a subchart when enabled. All values are passed through to the ollama/ollama chart.
+# See https://github.com/otwld/ollama-helm for full configuration options.
+ollama:
+  # Enable Ollama subchart deployment
+  # Set to true to deploy Ollama as a subchart, or false to use an external Ollama instance
+  enabled: false
+  # External Ollama URL (use this if you have Ollama deployed elsewhere)
+  # When set, use enabled: false to prevent deploying the subchart
+  # Example: "http://ollama.default.svc.cluster.local:11434"
+  url: ""
+  # Embedding model to use
+  embeddingModel: "nomic-embed-text"
+  # Verify SSL certificates when connecting to Ollama
+  verifySsl: true
+  # Number of Ollama replicas (only used when subchart is deployed)
+  replicaCount: 1
+  # Ollama configuration (only used when subchart is deployed)
+  ollama:
+    # Models to automatically pull on startup
+    models:
+      pull:
+        - nomic-embed-text
+  # Persistent storage for models (only used when subchart is deployed)
+  persistentVolume:
+    enabled: true
+    size: 20Gi
+    storageClass: ""
+  # Resource limits and requests (only used when subchart is deployed)
+  resources:
+    requests:
+      cpu: 500m
+      memory: 1Gi
+    limits:
+      cpu: 2000m
+      memory: 4Gi
+
+# OpenAI-compatible Embedding Provider
+# Alternative to Ollama for embedding generation. Can be used with OpenAI or any compatible API.
+openai:
+  # Enable OpenAI embedding provider
+  enabled: false
+  # OpenAI API key (only used if existingSecret is not set)
+  apiKey: ""
+  # Name of existing secret containing the API key
+  existingSecret: ""
+  # Key in the secret that contains the API key
+  secretKey: "api-key"
+  # Optional custom API endpoint (e.g., for Azure OpenAI or local compatible services)
+  baseUrl: ""
@@ -3,7 +3,7 @@ services:
  # https://hub.docker.com/_/mariadb
  db:
    # Note: Check the recommend version here: https://docs.nextcloud.com/server/latest/admin_manual/installation/system_requirements.html#server
-    image: docker.io/library/mariadb:lts@sha256:ae6119716edac6998ae85508431b3d2e666530ddf4e94c61a10710caec9b0f71
+    image: docker.io/library/mariadb:lts@sha256:1cac8492bd78b1ec693238dc600be173397efd7b55eabc725abc281dc855b482
    restart: always
    command: --transaction-isolation=READ-COMMITTED
    volumes:
@@ -17,11 +17,11 @@ services:
  # Note: Redis is an external service. You can find more information about the configuration here:
  # https://hub.docker.com/_/redis
  redis:
-    image: docker.io/library/redis:alpine@sha256:28c9c4d7596949a24b183eaaab6455f8e5d55ecbf72d02ff5e2c17fe72671d31
+    image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
    restart: always

  app:
-    image: docker.io/library/nextcloud:32.0.1@sha256:1e4eae55eebe094cae6f9e7b6e0b4bccf4a4fe7b7e6f6f8f57010994b3b2ee42
+    image: docker.io/library/nextcloud:32.0.2@sha256:ac08482d73ffd85d94069ba291bbd5fb39a70ff21502030a2e3e2d89a7246a48
    restart: always
    ports:
      - 0.0.0.0:8080:80
@@ -58,7 +58,7 @@ services:
      - ./tests/fixtures/nginx.conf:/etc/nginx/nginx.conf:ro

  unstructured:
-    image: downloads.unstructured.io/unstructured-io/unstructured-api:latest@sha256:a43ab55898599157fb0e0e097dabb8ecdd1d8e3df1ae5b67c6e15a136b171a6c
+    image: downloads.unstructured.io/unstructured-io/unstructured-api:latest@sha256:54282d3a25f33fd6cf69bc45b3d37770f213593f58b6dfe5e85fe546376b2807
    restart: always
    ports:
      - 127.0.0.1:8002:8000
@@ -69,17 +69,58 @@ services:

  mcp:
    build: .
-    command: ["--transport", "streamable-http"]
    restart: always
+    command: ["--transport", "streamable-http"]
    depends_on:
      app:
        condition: service_healthy
    ports:
      - 127.0.0.1:8000:8000
+      - 127.0.0.1:9090:9090
+    volumes:
+      - mcp-data:/app/data
    environment:
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_USERNAME=admin
      - NEXTCLOUD_PASSWORD=admin
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
+
+      # Vector sync configuration (ADR-007)
+      - VECTOR_SYNC_ENABLED=true
+      - VECTOR_SYNC_SCAN_INTERVAL=60
+      - VECTOR_SYNC_PROCESSOR_WORKERS=1
+
+      #- LOG_FORMAT=json
+
+      # Qdrant configuration (three modes):
+      # 1. Network mode: Set QDRANT_URL=http://qdrant:6333 (requires qdrant service)
+      # 2. In-memory mode: Set QDRANT_LOCATION=:memory: (default if nothing set)
+      # 3. Persistent local: Set QDRANT_LOCATION=/app/data/qdrant (stored in mcp-data volume)
+      #- QDRANT_LOCATION=/app/data/qdrant  # In-memory mode used if not set
+      #- QDRANT_URL=http://qdrant:6333  # Uncomment for network mode
+      #- QDRANT_API_KEY=${QDRANT_API_KEY:-my_secret_api_key}  # Only for network mode
+
+      # Observability
+      #- OTEL_SERVICE_NAME=nextcloud-mcp-docker-compose
+      #- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+
+      # Collection naming: Auto-generated as {deployment-id}-{model-name}
+      # - Deployment ID: OTEL_SERVICE_NAME (if set) or hostname (fallback)
+      # - Model name: OLLAMA_EMBEDDING_MODEL
+      # - Example: "nextcloud-mcp-server-nomic-embed-text"
+      # - Changing models creates new collection (requires re-embedding)
+      # - Set QDRANT_COLLECTION to override auto-generation:
+      #- QDRANT_COLLECTION=nextcloud_content
+
+      # Ollama configuration (optional - uses SimpleEmbeddingProvider if not set)
+      # - OLLAMA_BASE_URL=http://ollama:11434
+      # - OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Changing this creates new collection
+      # - OLLAMA_VERIFY_SSL=false
+
+      # Document chunking configuration (for vector embeddings)
+      # Tune these based on your embedding model and content type
+      # - DOCUMENT_CHUNK_SIZE=512      # Words per chunk (default: 512)
+      # - DOCUMENT_CHUNK_OVERLAP=50    # Overlapping words (default: 50, recommended: 10-20% of chunk size)

  mcp-oauth:
    build: .
@@ -96,6 +137,7 @@ services:
      # OIDC_CLIENT_ID not set - uses Dynamic Client Registration (DCR)
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_MCP_SERVER_URL=http://localhost:8001
+      - NEXTCLOUD_RESOURCE_URI=http://localhost:8080  # ADR-005: Nextcloud resource identifier for audience validation
      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
      - NEXTCLOUD_OIDC_SCOPES=openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write

@@ -104,8 +146,9 @@ services:
      - TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
      - TOKEN_STORAGE_DB=/app/data/tokens.db

-      # ADR-004: Use Hybrid Flow (server intercepts OAuth callback)
-      # Set to false to enable Hybrid Flow tests - server stores refresh token and issues MCP codes
+      # ADR-005: Multi-audience mode (default - ENABLE_TOKEN_EXCHANGE=false)
+      # Tokens must contain BOTH MCP and Nextcloud audiences
+      # No token exchange needed - tokens work for both MCP auth and Nextcloud APIs

      # NO admin credentials - using OAuth with Dynamic Client Registration (DCR)
      # Client credentials registered via RFC 7591 and stored in volume
@@ -115,7 +158,7 @@ services:
      - oauth-tokens:/app/data

  keycloak:
-    image: quay.io/keycloak/keycloak:26.4.2@sha256:3617b09bb4b7510a8d8d9b9fc5707399e2d70688dbcc2f8fb013a144829be1b9
+    image: quay.io/keycloak/keycloak:26.4.5@sha256:653852bfdea2be6e958b9e90a976eff1c6de34edd55f2f679bdc48ef16bc528e
    command:
      - "start-dev"
      - "--import-realm"
@@ -152,13 +195,14 @@ services:
      # Provider auto-detected from OIDC_DISCOVERY_URL issuer
      # Using internal Docker hostname for discovery to get consistent issuer
      - OIDC_DISCOVERY_URL=http://keycloak:8080/realms/nextcloud-mcp/.well-known/openid-configuration
-      - OIDC_CLIENT_ID=nextcloud-mcp-server
-      - OIDC_CLIENT_SECRET=mcp-secret-change-in-production
+      - NEXTCLOUD_OIDC_CLIENT_ID=nextcloud-mcp-server
+      - NEXTCLOUD_OIDC_CLIENT_SECRET=mcp-secret-change-in-production
      - OIDC_JWKS_URI=http://keycloak:8080/realms/nextcloud-mcp/protocol/openid-connect/certs

      # Nextcloud API endpoint (for accessing APIs with validated token)
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_MCP_SERVER_URL=http://localhost:8002
+      - NEXTCLOUD_RESOURCE_URI=nextcloud  # ADR-005: Keycloak uses client IDs as audiences, not URLs
      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8888/realms/nextcloud-mcp

      # Refresh token storage (ADR-002 Tier 1 & 2)
@@ -166,8 +210,11 @@ services:
      - TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
      - TOKEN_STORAGE_DB=/app/data/tokens.db

-      # Token exchange (RFC 8693) - convert aud:nextcloud-mcp-server → aud:nextcloud
+      # ADR-005: Token exchange mode (RFC 8693)
+      # Exchange MCP tokens (aud: nextcloud-mcp-server) for Nextcloud tokens (aud: http://localhost:8080)
+      # Provides strict audience separation between MCP session and Nextcloud API access
      - ENABLE_TOKEN_EXCHANGE=true
+      - TOKEN_EXCHANGE_CACHE_TTL=300  # Cache exchanged tokens for 5 minutes (default)

      # OAuth scopes (optional - uses defaults if not specified)
      - NEXTCLOUD_OIDC_SCOPES=openid profile email offline_access notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write
@@ -177,6 +224,44 @@ services:
      - keycloak-tokens:/app/data
      - keycloak-oauth-storage:/app/.oauth

+  # Smithery stateless deployment mode (ADR-016)
+  # Test with: docker compose --profile smithery up smithery
+  # Then: curl http://localhost:8081/.well-known/mcp-config
+  smithery:
+    build:
+      context: .
+      dockerfile: Dockerfile.smithery
+    restart: always
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8081:8081
+    environment:
+      - SMITHERY_DEPLOYMENT=true
+      - VECTOR_SYNC_ENABLED=false
+      - PORT=8081
+    profiles:
+      - smithery
+
+  qdrant:
+    image: qdrant/qdrant:v1.16.0@sha256:1005201498cf927d835383d0f918b17d8c9da7db58550f169f694455e42d78f4
+    restart: always
+    ports:
+      - 127.0.0.1:6333:6333  # REST API
+      - 127.0.0.1:6334:6334  # gRPC (optional)
+    volumes:
+      - qdrant-data:/qdrant/storage
+    environment:
+      - QDRANT__SERVICE__API_KEY=${QDRANT_API_KEY:-my_secret_api_key}
+    healthcheck:
+      test: ["CMD-SHELL", "test -f /qdrant/.qdrant-initialized"]
+      interval: 10s
+      timeout: 5s
+      retries: 10
+    profiles:
+      - qdrant
+
 volumes:
  nextcloud:
  db:
@@ -184,3 +269,5 @@ volumes:
  oauth-tokens:
  keycloak-tokens:
  keycloak-oauth-storage:
+  qdrant-data:
+  mcp-data:
@@ -1,7 +1,9 @@
 # ADR-003: Vector Database and Semantic Search Architecture

 ## Status
-Proposed
+Superseded by ADR-007
+
+**Note**: This ADR was never implemented. The core technical decisions (Qdrant, embeddings, hybrid search) remain valid and are incorporated into ADR-007, which adds user-controlled background job management, task queuing, multi-user scheduling, and web UI integration. See [ADR-007: Background Vector Sync with User-Controlled Job Management](./ADR-007-background-vector-sync-job-management.md) for the implemented architecture.

 ## Context

@@ -0,0 +1,865 @@
+# ADR-006: Progressive Consent via URL Elicitation (SEP-1036)
+
+**Status**: Partially Implemented (Interim Workaround)
+**Date**: 2025-01-05 (Updated: 2025-01-07)
+**Related**: [SEP-1036](https://github.com/modelcontextprotocol/specification/pull/887), ADR-004
+**Depends On**: ADR-005 (token validation)
+
+## Context
+
+### What is Progressive Consent?
+
+**Progressive consent is a mechanism, not a feature**. It describes HOW users grant the MCP server access to Nextcloud resources through OAuth elicitation. The server can operate in two modes:
+
+1. **Pass-through mode (ENABLE_OFFLINE_ACCESS=false)**:
+   - No refresh tokens requested or stored
+   - Server passes through client's access token to Nextcloud
+   - No provisioning tools available
+   - Suitable for stateless, client-driven operations
+
+2. **Offline access mode (ENABLE_OFFLINE_ACCESS=true)**:
+   - Server requests `offline_access` scope and stores refresh tokens
+   - Enables background operations and server-initiated API calls
+   - Provisioning tools available (`provision_nextcloud_access`, `check_logged_in`)
+   - Requires explicit user consent via OAuth Flow 2
+
+**Single-user mode (BasicAuth)** doesn't use progressive consent at all - credentials are directly available.
+
+### Current User Experience Issues
+
+The current offline access provisioning flow (ADR-004) requires users to manually visit OAuth URLs returned by MCP tools. This creates a poor user experience:
+
+1. User calls `provision_nextcloud_access` tool
+2. Tool returns a URL as text in the response
+3. User must manually copy URL and open in browser
+4. No indication when provisioning is complete
+5. User must retry the original operation manually
+
+### SEP-1036: URL Mode Elicitation
+
+The MCP specification now supports **URL mode elicitation** ([SEP-1036](https://github.com/modelcontextprotocol/specification/pull/887)), which enables servers to:
+
+- Request out-of-band user interactions via secure URLs
+- Handle sensitive operations like OAuth flows without exposing credentials to the client
+- Provide progress tracking for async operations
+- Return errors that automatically trigger elicitation flows
+
+**Key benefits for progressive consent**:
+- **Automatic URL Opening**: Client opens URL in browser automatically (with user consent)
+- **Progress Tracking**: Server can notify client when provisioning is complete
+- **Error-Triggered Flows**: Server can return `ElicitationRequired` error to trigger provisioning
+- **Better UX**: User doesn't manually copy/paste URLs
+
+### Current Implementation Limitations
+
+The current progressive consent flow in `nextcloud_mcp_server/server/oauth_tools.py`:
+
+```python
+@mcp.tool(name="provision_nextcloud_access")
+async def tool_provision_access(ctx: Context) -> ProvisioningResult:
+    """Returns OAuth URL as text - user must manually open it."""
+    return ProvisioningResult(
+        success=True,
+        authorization_url=auth_url,  # User must copy this
+        message="Please visit the authorization URL..."
+    )
+```
+
+**Problems**:
+1. Manual URL handling (copy/paste)
+2. No progress tracking
+3. No automatic retry after provisioning
+4. Tool call required just to get URL
+5. No client integration (URL just displayed as text)
+
+## Decision
+
+We will **migrate progressive consent from manual tools to URL mode elicitation**, leveraging SEP-1036 for better user experience and OAuth security.
+
+### New Architecture: Elicitation-Driven Consent
+
+Instead of explicit tools, use **automatic elicitation** triggered by authorization errors:
+
+```
+User → Calls Nextcloud Tool → Server Checks Provisioning
+                                     ↓ Not Provisioned
+                                Error: ElicitationRequired
+                                     ↓
+                          Client Shows Consent UI
+                                     ↓ User Accepts
+                          Client Opens OAuth URL
+                                     ↓
+                          User Completes OAuth
+                                     ↓
+                          Server Sends Progress Update
+                                     ↓
+                      Original Tool Call Auto-Retries
+```
+
+### Mode 1: Elicitation-Required Error (Primary)
+
+When a tool requires provisioning, return an **ElicitationRequired error** (-32000):
+
+```python
+# In any Nextcloud tool decorated with @require_provisioning
+@mcp.tool()
+@require_provisioning  # New decorator
+async def nc_notes_list_notes(ctx: Context):
+    """List notes - auto-triggers provisioning if needed."""
+    # If not provisioned, decorator returns ElicitationRequired error
+    # If provisioned, continues normally
+    client = await get_client(ctx)
+    return await client.notes.list_notes()
+```
+
+**Error response structure**:
+```json
+{
+  "jsonrpc": "2.0",
+  "id": 1,
+  "error": {
+    "code": -32000,
+    "message": "Nextcloud access provisioning required",
+    "data": {
+      "elicitations": [
+        {
+          "mode": "url",
+          "elicitationId": "550e8400-e29b-41d4-a716-446655440000",
+          "url": "https://mcp.example.com/oauth/provision?id=550e8400...",
+          "message": "Grant the MCP server access to your Nextcloud account to continue."
+        }
+      ]
+    }
+  }
+}
+```
+
+**Client behavior**:
+1. Receives error with elicitation
+2. Shows consent UI: "App wants to access Nextcloud. Open authorization page?"
+3. On user acceptance, opens URL in browser
+4. Optionally tracks progress via `elicitation/track`
+5. Auto-retries original tool call when complete
+
+### Mode 2: Explicit Elicitation Request (Fallback)
+
+For clients that don't support error-triggered elicitation, provide explicit tool:
+
+```python
+@mcp.tool(name="request_nextcloud_access")
+async def request_access(ctx: Context) -> ElicitationResponse:
+    """Explicitly request provisioning via elicitation."""
+    # Send elicitation/create request
+    return await create_elicitation(
+        mode="url",
+        url=generate_oauth_url(),
+        message="Grant access to Nextcloud",
+        elicitation_id=generate_id()
+    )
+```
+
+**Note**: This is a fallback for compatibility. Primary flow uses error-triggered elicitation.
+
+## Implementation
+
+### 1. New Decorator: `@require_provisioning`
+
+Replace explicit provisioning checks with a decorator that returns `ElicitationRequired`:
+
+```python
+# nextcloud_mcp_server/auth/provisioning_decorator.py
+
+def require_provisioning(func):
+    """
+    Decorator that ensures user has provisioned Nextcloud access.
+
+    If not provisioned, returns ElicitationRequired error with OAuth URL.
+    Otherwise, proceeds with normal tool execution.
+    """
+    @functools.wraps(func)
+    async def wrapper(ctx: Context, *args, **kwargs):
+        # Extract user ID from token
+        user_id = get_user_id_from_context(ctx)
+
+        # Check if provisioned
+        storage = RefreshTokenStorage.from_env()
+        await storage.initialize()
+
+        if not await storage.has_refresh_token(user_id):
+            # Not provisioned - return ElicitationRequired error
+            elicitation_id = str(uuid.uuid4())
+            oauth_url = await generate_oauth_url_for_provisioning(
+                user_id=user_id,
+                elicitation_id=elicitation_id,
+                ctx=ctx
+            )
+
+            # Store elicitation for tracking
+            await storage.store_elicitation(
+                elicitation_id=elicitation_id,
+                user_id=user_id,
+                status="pending",
+                created_at=datetime.now(timezone.utc)
+            )
+
+            raise McpError(
+                code=ErrorCode.ELICITATION_REQUIRED,  # -32000
+                message="Nextcloud access provisioning required",
+                data={
+                    "elicitations": [
+                        {
+                            "mode": "url",
+                            "elicitationId": elicitation_id,
+                            "url": oauth_url,
+                            "message": (
+                                "Grant the MCP server access to your Nextcloud "
+                                "account to continue. This is a one-time setup."
+                            )
+                        }
+                    ]
+                }
+            )
+
+        # Already provisioned - proceed normally
+        return await func(ctx, *args, **kwargs)
+
+    return wrapper
+```
+
+### 2. Elicitation Tracking Endpoint
+
+Implement `elicitation/track` to provide progress updates:
+
+```python
+# nextcloud_mcp_server/server/elicitation.py
+
+@mcp.request_handler("elicitation/track")
+async def track_elicitation(
+    elicitation_id: str,
+    _meta: dict = None
+) -> dict:
+    """
+    Track progress of an elicitation request.
+
+    Returns when elicitation is complete or times out.
+    """
+    progress_token = _meta.get("progressToken") if _meta else None
+
+    storage = RefreshTokenStorage.from_env()
+    await storage.initialize()
+
+    # Poll for completion (with timeout)
+    timeout = 300  # 5 minutes
+    start_time = datetime.now(timezone.utc)
+
+    while (datetime.now(timezone.utc) - start_time).seconds < timeout:
+        elicitation = await storage.get_elicitation(elicitation_id)
+
+        if not elicitation:
+            raise McpError(
+                code=-32602,  # Invalid params
+                message=f"Unknown elicitation ID: {elicitation_id}"
+            )
+
+        # Send progress notification if token provided
+        if progress_token and elicitation["status"] == "pending":
+            await send_progress_notification(
+                progress_token=progress_token,
+                progress=50,
+                message="Waiting for OAuth authorization..."
+            )
+
+        # Check if complete
+        if elicitation["status"] == "complete":
+            return {"status": "complete"}
+
+        # Check if failed
+        if elicitation["status"] == "failed":
+            return {
+                "status": "failed",
+                "error": elicitation.get("error_message")
+            }
+
+        # Wait before polling again
+        await asyncio.sleep(2)
+
+    # Timeout
+    raise McpError(
+        code=-32000,
+        message="Elicitation timed out - user did not complete authorization"
+    )
+```
+
+### 3. OAuth Callback Updates
+
+Update the OAuth callback to mark elicitations as complete:
+
+```python
+# nextcloud_mcp_server/auth/oauth_routes.py
+
+async def oauth_callback(request: Request) -> Response:
+    """Handle OAuth callback and mark elicitation complete."""
+    code = request.query_params.get("code")
+    state = request.query_params.get("state")
+
+    # Validate and exchange code for tokens
+    tokens = await exchange_authorization_code(code)
+
+    # Store refresh token
+    await storage.store_refresh_token(
+        user_id=user_id,
+        refresh_token=tokens["refresh_token"]
+    )
+
+    # Mark elicitation as complete
+    elicitation_id = request.query_params.get("elicitation_id")
+    if elicitation_id:
+        await storage.update_elicitation(
+            elicitation_id=elicitation_id,
+            status="complete",
+            completed_at=datetime.now(timezone.utc)
+        )
+
+    return Response(
+        content="<h1>Authorization Complete!</h1>"
+        "<p>You can close this window and return to the application.</p>",
+        media_type="text/html"
+    )
+```
+
+### 4. Update All Nextcloud Tools
+
+Add `@require_provisioning` decorator to all Nextcloud tools:
+
+```python
+# nextcloud_mcp_server/server/notes.py
+
+@mcp.tool()
+@require_scopes("notes:read")
+@require_provisioning  # NEW: Auto-triggers provisioning
+async def nc_notes_list_notes(
+    ctx: Context,
+    category: Optional[str] = None
+) -> NotesListResponse:
+    """List all notes - automatically handles provisioning."""
+    client = await get_client(ctx)
+    # Tool logic proceeds only if provisioned
+    notes = await client.notes.list_notes(category=category)
+    return NotesListResponse(results=notes)
+```
+
+### 5. Capability Declaration
+
+Declare URL elicitation support during initialization:
+
+```python
+# nextcloud_mcp_server/app.py
+
+capabilities = {
+    "elicitation": {
+        "url": {}  # Declare URL mode support
+        # Note: We don't support "form" mode (in-band data collection)
+    },
+    # ... other capabilities
+}
+```
+
+### 6. Environment Variables
+
+**Primary control**:
+```bash
+# ENABLE_OFFLINE_ACCESS: Controls whether server requests refresh tokens and enables provisioning tools
+# Default: false (pass-through mode)
+# Set to true to enable offline access mode with Flow 2 provisioning
+ENABLE_OFFLINE_ACCESS=true
+```
+
+**Future variables** (when URL elicitation is implemented):
+```bash
+# ELICITATION_CALLBACK_URL: Base URL for OAuth callbacks with elicitation tracking
+# Default: NEXTCLOUD_MCP_SERVER_URL + /oauth/callback
+ELICITATION_CALLBACK_URL=http://localhost:8000/oauth/callback
+
+# ELICITATION_TIMEOUT_SECONDS: How long to wait for user to complete OAuth
+# Default: 300 (5 minutes)
+ELICITATION_TIMEOUT_SECONDS=300
+```
+
+**Removed variables**:
+```bash
+# ENABLE_PROGRESSIVE_CONSENT - Removed. Progressive consent is a mechanism, not a feature toggle.
+#                               Use ENABLE_OFFLINE_ACCESS to control whether provisioning tools are available.
+# MCP_SERVER_CLIENT_ID - merged into OIDC_CLIENT_ID
+```
+
+## User Experience Comparison
+
+### Before (ADR-004 Manual Tools)
+
+```
+User: "List my notes"
+Assistant: *calls nc_notes_list_notes*
+Server: Error - not provisioned
+Assistant: "You need to provision access first. Let me do that."
+Assistant: *calls provision_nextcloud_access*
+Server: {authorization_url: "https://..."}
+Assistant: "Please visit this URL: https://..."
+User: *copies URL, opens browser, completes OAuth*
+User: "OK, I'm done"
+Assistant: *calls nc_notes_list_notes again*
+Server: Success! [notes...]
+```
+
+**Issues**: 4 interactions, manual URL handling, no automation
+
+### After (ADR-006 Elicitation)
+
+```
+User: "List my notes"
+Assistant: *calls nc_notes_list_notes*
+Server: ElicitationRequired error
+Client: Shows dialog: "Grant access to Nextcloud? [Yes] [No]"
+User: *clicks Yes*
+Client: Opens OAuth URL in browser automatically
+User: *completes OAuth*
+Server: Sends progress notification "Complete!"
+Client: Auto-retries nc_notes_list_notes
+Server: Success! [notes...]
+Assistant: "Here are your notes: ..."
+```
+
+**Benefits**: 1 interaction, automatic URL opening, seamless retry
+
+## Migration Path
+
+### Phase 1: Add Elicitation Support (v0.26.0)
+
+- Implement `@require_provisioning` decorator
+- Add `elicitation/track` endpoint
+- Keep existing tools (`provision_nextcloud_access`) for compatibility
+- Update OAuth callback to track elicitations
+- Add capability declaration
+
+**Breaking changes**: None (additive)
+
+### Phase 2: Update Documentation (v0.27.0)
+
+- Document elicitation-based flow as primary
+- Mark manual tools as deprecated
+- Update examples and guides
+
+**Breaking changes**: None (documentation only)
+
+### Phase 3: Remove Manual Tools (v0.28.0)
+
+- Remove `provision_nextcloud_access` tool
+- Remove `check_provisioning_status` tool (status in error message)
+- Remove `revoke_nextcloud_access` (or keep for explicit revocation?)
+
+**Breaking changes**: Yes (removed tools)
+
+### Phase 4: Optimize (v0.29.0+)
+
+- Add elicitation result caching
+- Implement retry strategies
+- Add metrics and monitoring
+
+## Testing
+
+### Test Cases
+
+1. **First-Time User Flow**
+   ```python
+   @pytest.mark.oauth
+   async def test_elicitation_first_time_user(nc_mcp_oauth_client):
+       """Test that first tool call triggers elicitation."""
+       # User has no provisioning
+       with pytest.raises(McpError) as exc:
+           await nc_mcp_oauth_client.call_tool("nc_notes_list_notes")
+
+       # Should get ElicitationRequired error
+       assert exc.value.code == -32000
+       assert "elicitations" in exc.value.data
+       assert exc.value.data["elicitations"][0]["mode"] == "url"
+
+       # Verify URL is valid OAuth URL
+       url = exc.value.data["elicitations"][0]["url"]
+       assert "oauth" in url
+       assert "elicitationId" in url
+   ```
+
+2. **Progress Tracking**
+   ```python
+   @pytest.mark.oauth
+   async def test_elicitation_progress_tracking(nc_mcp_oauth_client):
+       """Test progress tracking during OAuth flow."""
+       # Trigger elicitation
+       elicitation_id = trigger_elicitation()
+
+       # Start tracking
+       track_task = asyncio.create_task(
+           nc_mcp_oauth_client.track_elicitation(
+               elicitation_id=elicitation_id,
+               progress_token="test-token"
+           )
+       )
+
+       # Simulate OAuth completion
+       await asyncio.sleep(1)
+       await complete_oauth_flow(elicitation_id)
+
+       # Track should complete
+       result = await track_task
+       assert result["status"] == "complete"
+   ```
+
+3. **Auto-Retry After Provisioning**
+   ```python
+   @pytest.mark.oauth
+   async def test_auto_retry_after_provisioning(nc_mcp_oauth_client):
+       """Test that client auto-retries after elicitation."""
+       # Mock client that auto-retries on ElicitationRequired
+       client = AutoRetryMcpClient(nc_mcp_oauth_client)
+
+       # First call triggers elicitation, client handles it, retries
+       result = await client.call_tool_with_elicitation("nc_notes_list_notes")
+
+       # Should succeed after provisioning
+       assert result.success
+       assert "notes" in result.data
+   ```
+
+4. **Timeout Handling**
+   ```python
+   @pytest.mark.oauth
+   async def test_elicitation_timeout(nc_mcp_oauth_client):
+       """Test timeout if user doesn't complete OAuth."""
+       elicitation_id = trigger_elicitation()
+
+       # Track with short timeout
+       with pytest.raises(McpError, match="timed out"):
+           await nc_mcp_oauth_client.track_elicitation(
+               elicitation_id=elicitation_id,
+               timeout=5  # 5 seconds
+           )
+   ```
+
+## Security Considerations
+
+### Out-of-Band OAuth Flow
+
+**Benefit**: OAuth credentials never pass through MCP client
+- User enters credentials directly on IdP page
+- MCP server receives only authorization code
+- Client never sees passwords or refresh tokens
+
+**Threat mitigation**:
+- **Credential theft**: Client can't intercept credentials (out-of-band)
+- **Token exposure**: Client never receives Nextcloud refresh tokens
+- **CSRF**: State parameter validates OAuth callback
+- **URL tampering**: Elicitation ID ties OAuth flow to user session
+
+### Elicitation ID as Security Token
+
+The `elicitationId` serves as a capability token:
+- Cryptographically random (UUID v4)
+- Single-use (invalidated after completion)
+- Time-limited (expires after timeout)
+- User-scoped (tied to user session)
+
+**Validation**:
+```python
+async def validate_elicitation_id(elicitation_id: str, user_id: str) -> bool:
+    """Validate that elicitation belongs to user and is still valid."""
+    elicitation = await storage.get_elicitation(elicitation_id)
+
+    if not elicitation:
+        return False
+
+    # Check ownership
+    if elicitation["user_id"] != user_id:
+        logger.warning(f"Elicitation ID mismatch: {elicitation_id}")
+        return False
+
+    # Check expiry
+    if elicitation["expires_at"] < datetime.now(timezone.utc):
+        return False
+
+    # Check not already used
+    if elicitation["status"] != "pending":
+        return False
+
+    return True
+```
+
+### Progress Tracking Security
+
+**Risk**: Progress token reuse across users
+
+**Mitigation**:
+- Progress tokens tied to elicitation ID
+- Elicitation ID tied to user session
+- Server validates ownership before sending updates
+
+## Consequences
+
+### Positive
+
+1. **Better UX**: Automatic URL opening, no manual copy/paste
+2. **Seamless Flow**: Auto-retry after provisioning
+3. **Progress Feedback**: User knows when OAuth is complete
+4. **Spec Compliance**: Implements SEP-1036 correctly
+5. **Secure by Design**: Out-of-band OAuth prevents credential exposure
+6. **Simpler API**: No explicit provisioning tools needed
+
+### Negative
+
+1. **Client Dependency**: Requires client support for URL elicitation
+2. **Complexity**: More moving parts (elicitation tracking, callbacks)
+3. **Polling**: Progress tracking uses polling (not ideal)
+4. **Breaking Change**: Removes manual provisioning tools (in v0.28.0)
+
+### Neutral
+
+1. **Storage Requirements**: Need to store elicitation state
+2. **Timeout Management**: Must handle long-running OAuth flows
+3. **Fallback Support**: Still need compatibility for older clients
+
+## Alternatives Considered
+
+### 1. Keep Manual Tools Only (Rejected)
+
+**Pros**: Simple, no client changes needed
+**Cons**: Poor UX, doesn't leverage SEP-1036
+
+**Rejection reason**: SEP-1036 provides better UX and security
+
+### 2. Form Mode Elicitation (Rejected)
+
+**Pros**: No browser redirect needed
+**Cons**: Would expose OAuth credentials to client (security violation)
+
+**Rejection reason**: Form mode only for non-sensitive data per SEP-1036
+
+### 3. Hybrid: Both Tools and Elicitation (Considered)
+
+**Pros**: Maximum compatibility, gradual migration
+**Cons**: API duplication, maintenance burden, confusing for users
+
+**Decision**: Support during migration (v0.26-0.27), remove in v0.28
+
+### 4. WebSocket for Progress (Rejected)
+
+**Pros**: Real-time updates instead of polling
+**Cons**: MCP spec uses polling pattern, adds complexity
+
+**Rejection reason**: Follow spec pattern (polling via elicitation/track)
+
+## Interim Implementation: Inline Form Elicitation (Pre-SEP-1036)
+
+**Note**: SEP-1036 (URL mode elicitation) is not yet available in the stable MCP Python SDK. As a temporary workaround, we've implemented a simplified version using the current **inline form elicitation** API.
+
+### What Changed
+
+Instead of waiting for URL mode elicitation, we implemented a `check_logged_in` tool that:
+
+1. Checks if the user has completed Flow 2 (resource provisioning)
+2. If logged in, returns `"yes"`
+3. If not logged in, uses **inline form elicitation** to prompt the user
+
+### Implementation Details
+
+**New Tool**: `check_logged_in`
+
+```python
+# nextcloud_mcp_server/server/oauth_tools.py
+
+class LoginConfirmation(BaseModel):
+    """Schema for login confirmation elicitation."""
+    acknowledged: bool = Field(
+        default=False,
+        description="Check this box after completing login at the provided URL",
+    )
+
+@mcp.tool(name="check_logged_in")
+@require_scopes("openid")
+async def tool_check_logged_in(ctx: Context, user_id: Optional[str] = None) -> str:
+    """Check if user is logged in and elicit login if needed."""
+    # Check if already logged in
+    status = await get_provisioning_status(ctx, user_id)
+    if status.is_provisioned:
+        return "yes"
+
+    # Generate OAuth URL for Flow 2
+    auth_url = generate_oauth_url_for_flow2(...)
+
+    # Use inline form elicitation (current MCP API)
+    result = await ctx.elicit(
+        message=f"Please log in to Nextcloud at the following URL:\n\n{auth_url}\n\nAfter completing the login, check the box below and click OK.",
+        schema=LoginConfirmation,
+    )
+
+    if result.action == "accept":
+        # Verify login succeeded
+        status = await get_provisioning_status(ctx, user_id)
+        return "yes" if status.is_provisioned else "Login not detected"
+    elif result.action == "decline":
+        return "Login declined by user."
+    else:
+        return "Login cancelled by user."
+```
+
+**OAuth Routes** (added to `app.py`):
+
+```python
+# Flow 2 routes for resource provisioning
+routes.append(
+    Route("/oauth/authorize-nextcloud", oauth_authorize_nextcloud, methods=["GET"])
+)
+routes.append(
+    Route("/oauth/callback-nextcloud", oauth_callback_nextcloud, methods=["GET"])
+)
+```
+
+### User Experience
+
+```
+User: *calls check_logged_in tool*
+
+MCP Client: Displays form elicitation
+┌─────────────────────────────────────────────────────────┐
+│ Please log in to Nextcloud at the following URL:      │
+│                                                         │
+│ http://localhost:8000/oauth/authorize-nextcloud?...    │
+│                                                         │
+│ After completing the login, check the box below and    │
+│ click OK.                                               │
+│                                                         │
+│ ☐ Check this box after completing login                │
+│                                                         │
+│ [Accept] [Decline] [Cancel]                            │
+└─────────────────────────────────────────────────────────┘
+
+User: *copies URL, opens in browser, completes OAuth*
+User: *checks box and clicks Accept*
+
+MCP Server: Verifies login and returns "yes"
+```
+
+### Limitations of Interim Approach
+
+1. **Manual URL Handling**: User must manually copy and paste the URL (not clickable)
+2. **No Automatic Browser Opening**: Client doesn't automatically open the URL
+3. **No Progress Tracking**: Can't track OAuth completion status in real-time
+4. **URL in Message Text**: Login URL embedded in plain text message (not as structured field)
+5. **Client-Side Confirmation**: Relies on user clicking "OK" after OAuth (honor system)
+
+### Why Not Use URL Mode Now?
+
+The current stable MCP Python SDK (`main` branch) only supports **inline form elicitation**:
+
+```python
+# Current API (no 'mode' parameter)
+class ElicitRequestParams(RequestParams):
+    message: str
+    requestedSchema: ElicitRequestedSchema
+    # No 'mode', 'url', or 'elicitationId' fields
+```
+
+URL mode elicitation (`mode: "url"`) is only available in the SEP-1036 branch, which has not been merged to `main` yet.
+
+### Migration to URL Mode (When SEP-1036 Lands)
+
+Once SEP-1036 is merged and available in the stable SDK, we will migrate to URL mode elicitation:
+
+**Before (Current Workaround)**:
+```python
+result = await ctx.elicit(
+    message=f"Please log in at: {auth_url}\n\nClick OK after login.",
+    schema=LoginConfirmation,
+)
+```
+
+**After (URL Mode)**:
+```python
+result = await ctx.session.elicit_url(
+    message="Please log in to Nextcloud to authorize this MCP server.",
+    url=auth_url,
+    elicitation_id=elicitation_id,
+)
+```
+
+**Benefits of migration**:
+- Automatic URL opening (with user consent)
+- Clickable URLs in client UI
+- Progress tracking via `elicitation/track`
+- Better security (URL not in message text)
+- Auto-retry support
+
+### Testing
+
+Integration tests validate the current inline form elicitation:
+
+```python
+# tests/server/oauth/test_login_elicitation.py
+
+async def test_check_logged_in_already_authenticated(nc_mcp_oauth_client):
+    """Test immediate 'yes' for authenticated users."""
+    result = await nc_mcp_oauth_client.call_tool("check_logged_in", arguments={})
+    assert "yes" in result.content[0].text.lower()
+
+async def test_check_logged_in_url_format(nc_mcp_oauth_client):
+    """Test that login URL (when needed) contains correct OAuth parameters."""
+    result = await nc_mcp_oauth_client.call_tool("check_logged_in", arguments={})
+    response_text = result.content[0].text
+
+    # If URL present, validate OAuth parameters
+    if "http" in response_text:
+        assert "response_type=code" in response_text
+        assert "client_id=" in response_text
+        assert "redirect_uri=" in response_text
+        assert "openid" in response_text
+```
+
+### Future Work
+
+- **Monitor SEP-1036**: Watch for merge to MCP Python SDK `main` branch
+- **Implement URL Mode**: Once available, migrate `check_logged_in` to use `ctx.session.elicit_url()`
+- **Add Progress Tracking**: Implement `elicitation/track` endpoint for OAuth completion status
+- **Implement Error-Triggered Elicitation**: Use `@require_provisioning` decorator to return `ElicitationRequired` errors
+- **Remove Manual Workaround**: Deprecate inline form approach once URL mode is stable
+
+## References
+
+- [SEP-1036: URL Mode Elicitation](https://github.com/modelcontextprotocol/specification/pull/887)
+- [MCP Elicitation Specification](https://modelcontextprotocol.io/specification/draft/client/elicitation)
+- [ADR-004: Federated Authentication Architecture](./ADR-004-mcp-application-oauth.md)
+- [ADR-005: Token Audience Validation](./ADR-005-token-audience-validation.md)
+- [RFC 8252: OAuth 2.0 for Native Apps](https://datatracker.ietf.org/doc/html/rfc8252)
+
+## Implementation Checklist
+
+### Interim Implementation (Inline Form Elicitation)
+
+- [x] Create `check_logged_in` tool with inline form elicitation
+- [x] Register Flow 2 OAuth routes (`/oauth/authorize-nextcloud`, `/oauth/callback-nextcloud`)
+- [x] Write integration tests for login elicitation flow
+- [x] Update ADR-006 with interim implementation documentation
+- [x] Add `LoginConfirmation` schema for elicitation
+- [ ] Run tests to validate implementation
+
+### Future Work (URL Mode Elicitation - Post SEP-1036)
+
+- [ ] Implement `@require_provisioning` decorator with ElicitationRequired error
+- [ ] Add `elicitation/track` request handler
+- [ ] Update OAuth callback to mark elicitations complete
+- [ ] Add elicitation storage (ID, user, status, timestamps)
+- [ ] Update all Nextcloud tools with `@require_provisioning`
+- [ ] Add URL elicitation capability declaration
+- [ ] Write tests for progress tracking
+- [ ] Update documentation with URL mode examples
+- [ ] Add migration guide for manual tools → elicitation
+- [ ] Migrate `check_logged_in` from inline form to URL mode
+- [ ] Keep manual tools with deprecation warnings (v0.26-0.27)
+- [ ] Remove manual tools (v0.28.0)
+- [ ] Update CHANGELOG.md with migration timeline
@@ -0,0 +1,647 @@
+# ADR-008: MCP Sampling for Multi-App Semantic Search with RAG
+
+**Status**: Proposed
+**Date**: 2025-01-11
+**Depends On**: ADR-007 (Background Vector Sync)
+
+## Context
+
+ADR-007 established a background synchronization architecture that maintains a vector database of Nextcloud content across multiple apps (notes, calendar, deck, files, contacts), enabling semantic search via the `nc_semantic_search` tool. This tool returns a list of relevant documents with excerpts, similarity scores, and metadata—providing the raw materials for answering user questions.
+
+However, users typically don't want a list of documents—they want answers to their questions. When a user asks "What are my project goals?" or "When is my next dentist appointment?", they expect a natural language response that synthesizes information from multiple sources and document types, not a ranked list of excerpts. This is the pattern of Retrieval-Augmented Generation (RAG): retrieve relevant context from all Nextcloud apps, then generate a cohesive answer.
+
+The challenge is: who should generate the answer, and how?
+
+**Option 1: Server-side LLM**
+The MCP server could maintain its own LLM connection (OpenAI API, Ollama, etc.), construct prompts from retrieved documents, and return generated answers directly. This approach has significant drawbacks:
+
+- **Duplicate infrastructure**: MCP clients (like Claude Desktop) already have LLM capabilities. The server would duplicate this with its own LLM integration, API keys, and configuration.
+- **Cost and billing**: The server operator bears LLM costs for all users, creating billing and quota management challenges.
+- **Limited model choice**: Users are locked into whatever LLM the server configures. They cannot choose their preferred model or provider.
+- **Privacy concerns**: User queries and document contents flow through a server-controlled LLM, creating a potential privacy boundary.
+- **Configuration complexity**: Server operators must configure embedding services (for search) AND generation models (for answers), each with different API keys, rate limits, and failure modes.
+
+**Option 2: Return documents, let client generate**
+The server could simply return retrieved documents and rely on the MCP client's existing LLM to generate answers. The user would call `nc_notes_semantic_search`, receive documents, and then the client would include those documents in its context when responding to the user's original question. This approach also has limitations:
+
+- **Context window waste**: The client must include all document content in its context window, even if only small excerpts are relevant. For 5-10 documents, this can consume significant context space.
+- **Inconsistent behavior**: Whether the client synthesizes an answer or just displays documents depends on the client's implementation and the user's conversational style. There's no guaranteed answer generation.
+- **Poor citations**: The client may generate an answer but fail to cite which specific documents were used, making it hard to verify claims.
+- **User confusion**: Users see a tool that returns "search results" rather than "answers", requiring them to explicitly ask for synthesis.
+
+**Option 3: MCP Sampling**
+The Model Context Protocol specification includes a **sampling** capability that allows MCP servers to request LLM completions from their clients. The server constructs a prompt with retrieved context, sends it to the client via `sampling/createMessage`, and the client's LLM generates a response that the server can return as a tool result.
+
+This approach combines the best of both options:
+
+- **No server-side LLM**: The server has no API keys, no LLM configuration, no billing concerns.
+- **User choice**: The MCP client controls which LLM is used (Claude, GPT-4, local Ollama) and who pays for it.
+- **User transparency**: MCP clients SHOULD present sampling requests to users for approval, making it clear when the server is requesting an LLM call.
+- **Consistent citations**: The server constructs a prompt that explicitly includes document references, ensuring generated answers cite sources.
+- **Single tool call**: Users call one tool (`nc_notes_semantic_search_answer`) and receive a complete answer with citations—no multi-turn conversation needed.
+
+The sampling approach shifts responsibility appropriately: the MCP server is responsible for information retrieval and context construction (its expertise), while the MCP client is responsible for LLM access and user preferences (its expertise). This follows the MCP design philosophy of separating concerns between servers (data access) and clients (user interaction).
+
+However, sampling introduces new considerations:
+
+**Client compatibility**: Not all MCP clients implement sampling. The server must gracefully degrade when sampling is unavailable, falling back to returning documents without generated answers.
+
+**Latency**: Sampling adds a full round-trip to the client and back, plus LLM generation time. A typical flow involves: (1) client calls tool, (2) server retrieves documents, (3) server requests sampling from client, (4) client generates answer, (5) server returns answer to client. This can take 2-5 seconds depending on LLM speed, compared to 100-500ms for document retrieval alone.
+
+**User approval**: MCP clients SHOULD prompt users to approve sampling requests, allowing users to review the prompt before sending it to their LLM. This is a privacy and security feature (prevents servers from making arbitrary LLM requests) but adds interaction friction.
+
+**Prompt engineering**: The server must construct effective prompts that guide the LLM to generate useful, well-cited answers. Unlike Option 1 where the server controls the LLM directly, the server has less control over how the prompt is interpreted.
+
+Despite these considerations, MCP sampling provides the most principled solution for RAG-enhanced semantic search. It respects the client-server boundary, avoids duplicate infrastructure, and delivers the user experience users expect from semantic search tools.
+
+This ADR proposes adding a new tool, `nc_semantic_search_answer`, that uses MCP sampling to generate natural language answers from retrieved Nextcloud content across all indexed apps (notes, calendar, deck, files, contacts).
+
+## Decision
+
+We will implement a new MCP tool `nc_semantic_search_answer` that retrieves relevant documents via vector similarity search across all indexed Nextcloud apps and uses MCP sampling to generate natural language answers. The tool will construct a prompt that includes the user's original query and excerpts from retrieved documents (notes, calendar events, deck cards, files, contacts), request an LLM completion via `ctx.session.create_message()`, and return the generated answer along with source citations.
+
+The existing `nc_semantic_search` tool will remain unchanged, providing users with a choice: call the original tool for raw document results, or call the new sampling-enhanced tool for generated answers. This dual-tool approach respects different use cases—some users want to browse documents, others want direct answers.
+
+### API Design
+
+**Tool Signature**:
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search_answer(
+    query: str,
+    ctx: Context,
+    limit: int = 5,
+    score_threshold: float = 0.7,
+    max_answer_tokens: int = 500,
+) -> SamplingSearchResponse
+```
+
+**Parameters**:
+- `query`: The user's natural language question
+- `ctx`: MCP context for session access
+- `limit`: Maximum documents to retrieve (default 5)
+- `score_threshold`: Minimum similarity score 0-1 (default 0.7)
+- `max_answer_tokens`: Maximum tokens for generated answer (default 500)
+
+**Response Model**:
+```python
+class SamplingSearchResponse(BaseResponse):
+    query: str                              # Original user query
+    generated_answer: str                   # LLM-generated answer
+    sources: list[SemanticSearchResult]     # Supporting documents
+    total_found: int                        # Total matching documents
+    search_method: str = "semantic_sampling"
+    model_used: str | None = None           # Model that generated answer
+    stop_reason: str | None = None          # Why generation stopped
+```
+
+The response includes both the generated answer (for direct user consumption) and the source documents (for verification and citation). The `model_used` field records which LLM generated the answer, allowing users to understand which model provided the response.
+
+### Sampling API Usage
+
+The tool uses the MCP Python SDK's `ServerSession.create_message()` API:
+
+```python
+from mcp.types import SamplingMessage, TextContent, ModelPreferences, ModelHint
+
+# Construct prompt with retrieved context
+prompt = (
+    f"{query}\n\n"
+    f"Here are relevant documents from Nextcloud (notes, calendar events, deck cards, files, contacts):\n\n"
+    f"{context}\n\n"
+    f"Based on the documents above, please provide a comprehensive answer. "
+    f"Cite the document numbers when referencing specific information."
+)
+
+# Request LLM completion via MCP sampling
+sampling_result = await ctx.session.create_message(
+    messages=[
+        SamplingMessage(
+            role="user",
+            content=TextContent(type="text", text=prompt),
+        )
+    ],
+    max_tokens=max_answer_tokens,
+    temperature=0.7,
+    model_preferences=ModelPreferences(
+        hints=[ModelHint(name="claude-3-5-sonnet")],
+        intelligencePriority=0.8,
+        speedPriority=0.5,
+    ),
+    include_context="thisServer",
+)
+
+# Extract answer from response
+if sampling_result.content.type == "text":
+    generated_answer = sampling_result.content.text
+```
+
+**Key parameters**:
+- `messages`: Chat-style messages with role ("user" or "assistant") and content
+- `max_tokens`: Limits response length to control costs and latency
+- `temperature`: 0.7 balances creativity with consistency for factual answers
+- `model_preferences`: Hints suggest Claude Sonnet for balanced intelligence/speed
+- `include_context`: "thisServer" includes MCP server context in client's LLM call
+
+The `include_context` parameter is particularly important. When set to "thisServer", the MCP client provides its LLM with context about the server's capabilities, tools, and resources. This allows the LLM to reference the Nextcloud MCP server when generating answers, creating more contextually appropriate responses. For example, the LLM might say "Based on your Nextcloud Notes..." rather than generic phrasing.
+
+### Prompt Construction
+
+The prompt construction follows a structured template:
+
+```
+[User's original query]
+
+Here are relevant documents from Nextcloud (notes, calendar events, deck cards, files, contacts):
+
+[Document 1]
+Type: note
+Title: Project Kickoff Notes
+Category: Work
+Excerpt: The primary goal for Q1 2025 is to improve semantic search...
+Relevance Score: 0.92
+
+[Document 2]
+Type: calendar_event
+Title: Team Planning Meeting
+Location: Conference Room A
+Excerpt: Scheduled for Jan 15 at 2pm. Agenda: Discuss Q1 objectives and timeline...
+Relevance Score: 0.88
+
+[Document 3]
+Type: deck_card
+Title: Implement semantic search
+Labels: feature, high-priority
+Excerpt: This card tracks the semantic search implementation. Due: Jan 30...
+Relevance Score: 0.85
+
+Based on the documents above, please provide a comprehensive answer.
+Cite the document numbers when referencing specific information.
+```
+
+This structure ensures:
+- The user's original query is preserved verbatim
+- Documents are clearly delineated and numbered for citation
+- Metadata (title, category, score) provides context
+- Explicit instruction to cite sources encourages proper attribution
+
+The prompt is intentionally simple and fixed (not configurable). Allowing users to customize the prompt would complicate the API and introduce prompt injection risks. The fixed structure ensures consistent, well-cited answers across all users.
+
+### Fallback Behavior
+
+Sampling may fail for several reasons:
+- Client doesn't support sampling (e.g., MCP Inspector without callbacks)
+- User declines the sampling request
+- Network errors during sampling round-trip
+- LLM generation errors
+
+The tool handles all failures gracefully by falling back to returning documents without a generated answer:
+
+```python
+try:
+    sampling_result = await ctx.session.create_message(...)
+    generated_answer = sampling_result.content.text
+except Exception as e:
+    logger.warning(f"Sampling failed: {e}, returning search results only")
+    generated_answer = (
+        f"[Sampling unavailable: {str(e)}]\n\n"
+        f"Found {total_found} relevant documents. Please review the sources below."
+    )
+```
+
+This ensures the tool always returns useful information—either a generated answer or the underlying documents—rather than failing completely. The user knows sampling was attempted (via the `[Sampling unavailable]` prefix) and can still access the retrieved context.
+
+### No Results Handling
+
+When semantic search finds no relevant documents (all below `score_threshold`), the tool returns a clear message without attempting sampling:
+
+```python
+if not search_response.results:
+    return SamplingSearchResponse(
+        query=query,
+        generated_answer="No relevant documents found in your Nextcloud content for this query.",
+        sources=[],
+        total_found=0,
+        search_method="semantic_sampling",
+        success=True,
+    )
+```
+
+This avoids wasting a sampling call (and user approval) when there's no content to base an answer on.
+
+### User Experience Flow
+
+**Typical successful flow**:
+1. User calls `nc_semantic_search_answer` with query "What are my Q1 2025 objectives?"
+2. Server retrieves 5 relevant documents via vector search (2 notes, 2 calendar events, 1 deck card)
+3. Server constructs prompt with document excerpts showing mixed content types
+4. Server sends `sampling/createMessage` request to client
+5. Client prompts user: "MCP server wants to generate an answer using these documents. Allow?"
+6. User approves (or client auto-approves based on configuration)
+7. Client sends prompt to LLM (Claude, GPT-4, etc.)
+8. LLM generates answer with citations: "Based on Document 1 (note: Project Kickoff), Document 2 (calendar: Team Planning Meeting), and Document 3 (deck card: Implement semantic search)..."
+9. Client returns answer to server
+10. Server returns `SamplingSearchResponse` with answer and sources
+11. User sees complete answer with citations across multiple Nextcloud apps
+
+**Fallback flow** (sampling unavailable):
+1-3. Same as above
+4. Server attempts `ctx.session.create_message()`
+5. Client raises exception: "Sampling not supported"
+6. Server catches exception, logs warning
+7. Server returns `SamplingSearchResponse` with documents and "[Sampling unavailable]" message
+8. User sees raw documents instead of generated answer
+
+**No results flow**:
+1-2. Same as above but no documents match threshold
+3. Server returns `SamplingSearchResponse` with "No relevant documents" message
+4. No sampling attempted (no prompt sent)
+5. User sees clear "not found" message
+
+This three-tier approach (answer → documents → error message) ensures users always receive useful feedback appropriate to the situation.
+
+## Implementation
+
+### Response Model
+
+Add to `nextcloud_mcp_server/models/semantic.py` (new file for semantic search models):
+
+```python
+from pydantic import Field
+
+class SamplingSearchResponse(BaseResponse):
+    """Response from semantic search with LLM-generated answer via MCP sampling.
+
+    This response includes both a generated natural language answer (created by
+    the MCP client's LLM via sampling) and the source documents used to generate
+    that answer. Users can read the answer for quick information and review
+    sources for verification and deeper exploration.
+
+    Attributes:
+        query: The original user query
+        generated_answer: Natural language answer generated by client's LLM
+        sources: List of semantic search results used as context
+        total_found: Total number of matching documents found
+        search_method: Always "semantic_sampling" for this response type
+        model_used: Name of model that generated the answer (e.g., "claude-3-5-sonnet")
+        stop_reason: Why generation stopped ("endTurn", "maxTokens", etc.)
+    """
+
+    query: str = Field(..., description="Original user query")
+    generated_answer: str = Field(
+        ...,
+        description="LLM-generated answer based on retrieved documents"
+    )
+    sources: list[SemanticSearchResult] = Field(
+        default_factory=list,
+        description="Source documents with excerpts and relevance scores"
+    )
+    total_found: int = Field(..., description="Total matching documents")
+    search_method: str = Field(
+        default="semantic_sampling",
+        description="Search method used"
+    )
+    model_used: str | None = Field(
+        default=None,
+        description="Model that generated the answer"
+    )
+    stop_reason: str | None = Field(
+        default=None,
+        description="Reason generation stopped"
+    )
+```
+
+### Tool Implementation
+
+Add to `nextcloud_mcp_server/server/semantic.py` (new file for semantic search tools):
+
+```python
+import logging
+from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
+
+logger = logging.getLogger(__name__)
+
+
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search_answer(
+    query: str,
+    ctx: Context,
+    limit: int = 5,
+    score_threshold: float = 0.7,
+    max_answer_tokens: int = 500,
+) -> SamplingSearchResponse:
+    """
+    Semantic search with LLM-generated answer using MCP sampling.
+
+    Retrieves relevant documents from Nextcloud across all indexed apps (notes,
+    calendar, deck, files, contacts) using vector similarity search, then uses
+    MCP sampling to request the client's LLM to generate a natural language
+    answer based on the retrieved context.
+
+    This tool combines the power of semantic search (finding relevant content
+    across all your Nextcloud apps) with LLM generation (synthesizing that
+    content into coherent answers). The generated answer includes citations
+    to specific documents with their types, allowing users to verify claims
+    and explore sources.
+
+    The LLM generation happens client-side via MCP sampling. The MCP client
+    controls which model is used, who pays for it, and whether to prompt the
+    user for approval. This keeps the server simple (no LLM API keys needed)
+    while giving users full control over their LLM interactions.
+
+    Args:
+        query: Natural language question to answer (e.g., "What are my Q1 objectives?" or "When is my next dentist appointment?")
+        ctx: MCP context for session access
+        limit: Maximum number of documents to retrieve (default: 5)
+        score_threshold: Minimum similarity score 0-1 (default: 0.7)
+        max_answer_tokens: Maximum tokens for generated answer (default: 500)
+
+    Returns:
+        SamplingSearchResponse containing:
+        - generated_answer: Natural language answer with citations
+        - sources: List of documents with excerpts and relevance scores
+        - model_used: Which model generated the answer
+        - stop_reason: Why generation stopped
+
+    Note: Requires MCP client to support sampling. If sampling is unavailable,
+    the tool gracefully degrades to returning documents with an explanation.
+    The client may prompt the user to approve the sampling request.
+
+    Examples:
+        >>> # Query about objectives across multiple apps
+        >>> result = await nc_semantic_search_answer(
+        ...     query="What are my Q1 2025 project goals?",
+        ...     ctx=ctx
+        ... )
+        >>> print(result.generated_answer)
+        "Based on Document 1 (note: Project Kickoff), Document 2 (calendar event:
+        Q1 Planning Meeting), and Document 3 (deck card: Implement semantic search),
+        your main goals are: 1) Improve semantic search accuracy by 20%,
+        2) Deploy new embedding model, 3) Reduce indexing latency..."
+
+        >>> # Query about appointments
+        >>> result = await nc_semantic_search_answer(
+        ...     query="When is my next dentist appointment?",
+        ...     ctx=ctx,
+        ...     limit=10
+        ... )
+        >>> len(result.sources)  # Calendar events and related notes
+        3
+    """
+    # 1. Retrieve relevant documents via existing semantic search
+    search_response = await nc_semantic_search(
+        query=query,
+        ctx=ctx,
+        limit=limit,
+        score_threshold=score_threshold,
+    )
+
+    # 2. Handle no results case - don't waste a sampling call
+    if not search_response.results:
+        logger.debug(f"No documents found for query: {query}")
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer="No relevant documents found in your Nextcloud content for this query.",
+            sources=[],
+            total_found=0,
+            search_method="semantic_sampling",
+            success=True,
+        )
+
+    # 3. Construct context from retrieved documents
+    context_parts = []
+    for idx, result in enumerate(search_response.results, 1):
+        context_parts.append(
+            f"[Document {idx}]\n"
+            f"Title: {result.title}\n"
+            f"Category: {result.category}\n"
+            f"Excerpt: {result.excerpt}\n"
+            f"Relevance Score: {result.score:.2f}\n"
+        )
+
+    context = "\n".join(context_parts)
+
+    # 4. Construct prompt - reuse user's query, add context and instructions
+    prompt = (
+        f"{query}\n\n"
+        f"Here are relevant documents from Nextcloud (notes, calendar events, deck cards, files, contacts):\n\n"
+        f"{context}\n\n"
+        f"Based on the documents above, please provide a comprehensive answer. "
+        f"Cite the document numbers when referencing specific information."
+    )
+
+    logger.debug(
+        f"Requesting sampling for query: {query} "
+        f"({len(search_response.results)} documents retrieved)"
+    )
+
+    # 5. Request LLM completion via MCP sampling
+    try:
+        sampling_result = await ctx.session.create_message(
+            messages=[
+                SamplingMessage(
+                    role="user",
+                    content=TextContent(type="text", text=prompt),
+                )
+            ],
+            max_tokens=max_answer_tokens,
+            temperature=0.7,
+            model_preferences=ModelPreferences(
+                hints=[ModelHint(name="claude-3-5-sonnet")],
+                intelligencePriority=0.8,
+                speedPriority=0.5,
+            ),
+            include_context="thisServer",
+        )
+
+        # 6. Extract answer from sampling response
+        if sampling_result.content.type == "text":
+            generated_answer = sampling_result.content.text
+        else:
+            # Handle non-text responses (shouldn't happen for text prompts)
+            generated_answer = (
+                f"Received non-text response of type: {sampling_result.content.type}"
+            )
+            logger.warning(
+                f"Unexpected content type from sampling: {sampling_result.content.type}"
+            )
+
+        logger.info(
+            f"Sampling successful: model={sampling_result.model}, "
+            f"stop_reason={sampling_result.stopReason}"
+        )
+
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer=generated_answer,
+            sources=search_response.results,
+            total_found=search_response.total_found,
+            search_method="semantic_sampling",
+            model_used=sampling_result.model,
+            stop_reason=sampling_result.stopReason,
+            success=True,
+        )
+
+    except Exception as e:
+        # Fallback: Return documents without generated answer
+        logger.warning(
+            f"Sampling failed ({type(e).__name__}: {e}), "
+            f"returning search results only"
+        )
+
+        return SamplingSearchResponse(
+            query=query,
+            generated_answer=(
+                f"[Sampling unavailable: {str(e)}]\n\n"
+                f"Found {search_response.total_found} relevant documents. "
+                f"Please review the sources below."
+            ),
+            sources=search_response.results,
+            total_found=search_response.total_found,
+            search_method="semantic_sampling_fallback",
+            success=True,
+        )
+```
+
+### Import Updates
+
+Add to top of `nextcloud_mcp_server/server/semantic.py`:
+
+```python
+from mcp.types import ModelHint, ModelPreferences, SamplingMessage, TextContent
+```
+
+Add to `nextcloud_mcp_server/models/semantic.py` exports:
+
+```python
+__all__ = [
+    "SemanticSearchResult",
+    "SemanticSearchResponse",
+    "SamplingSearchResponse",
+]
+```
+
+## Consequences
+
+### Benefits
+
+**Improved User Experience**: Users receive direct answers to questions rather than lists of documents, matching expectations from modern AI interfaces.
+
+**Proper Attribution**: Generated answers include citations to source documents, allowing users to verify claims and explore deeper.
+
+**No Server-Side LLM**: The server has no LLM dependencies, API keys, or billing concerns. All LLM interactions happen client-side.
+
+**User Control**: MCP clients control which model is used and may prompt users to approve sampling requests, maintaining transparency and user agency.
+
+**Graceful Degradation**: The tool works even when sampling is unavailable, falling back to returning documents. Existing clients continue working without changes.
+
+**Consistent Architecture**: Follows MCP's client-server separation: servers provide data access, clients provide user interaction and LLM capabilities.
+
+### Limitations
+
+**Sampling Support Required**: Not all MCP clients implement sampling. Users with basic clients see fallback behavior (documents without answers).
+
+**Added Latency**: Sampling adds 2-5 seconds to tool execution due to client round-trip and LLM generation time. Users must wait longer for answers than for raw search results.
+
+**User Approval Friction**: MCP clients SHOULD prompt users to approve sampling requests. This adds an extra interaction step before answers are generated.
+
+**Limited Prompt Control**: The server cannot fully control how the client's LLM interprets the prompt. Different models may generate different quality answers.
+
+**No Caching**: Each query requires a new sampling call. The server doesn't cache generated answers (clients may cache if they choose).
+
+**Token Costs**: LLM generation consumes tokens from the user's or client's quota. Heavy users may incur costs or hit rate limits.
+
+### Performance Characteristics
+
+**Typical latency**:
+- Document retrieval (vector search): 100-300ms
+- Sampling round-trip (client communication): 50-200ms
+- LLM generation (client-side): 1-4 seconds
+- **Total**: 2-5 seconds end-to-end
+
+**Throughput**: Sampling is fully async. The server can handle multiple concurrent sampling requests (limited by MCP client's concurrency, not server capacity).
+
+**Resource usage**: Minimal server-side. No GPU, no LLM model loading, no large memory requirements. Sampling happens entirely client-side.
+
+### Security Considerations
+
+**Prompt Injection Risk**: If user queries contain adversarial text designed to manipulate LLM behavior, those queries are included verbatim in the sampling prompt. Mitigation: The structured prompt format and explicit instructions ("based on documents above") constrain LLM behavior.
+
+**Data Privacy**: User queries and document excerpts are sent to the client's LLM. For cloud LLMs (OpenAI, Anthropic), this means data leaves the server's control. Mitigation: MCP clients SHOULD present sampling requests to users for approval, making data flows transparent. Users choose their LLM provider.
+
+**Sampling Abuse**: A malicious server could spam sampling requests to drain user quotas. Mitigation: MCP clients control approval and can rate-limit or block sampling from misbehaving servers.
+
+## Alternatives Considered
+
+### Server-Side LLM Integration
+
+**Approach**: Configure the MCP server with OpenAI API key or local Ollama instance. Generate answers server-side.
+
+**Rejected Because**:
+- Duplicates LLM infrastructure that MCP clients already have
+- Creates billing and API key management burden for server operators
+- Locks users into server-configured models
+- Violates MCP's client-server separation principle
+
+### Multi-Turn Conversation Pattern
+
+**Approach**: `nc_notes_semantic_search` returns documents. User asks follow-up question. Client's LLM uses previous tool results as context.
+
+**Rejected Because**:
+- Requires users to know to ask follow-up questions
+- Consumes context window with full document content
+- Inconsistent behavior across clients
+- Poor citation (LLM may not reference which documents it used)
+
+### Pre-Generated Summaries
+
+**Approach**: Generate and cache summaries during indexing. Return summaries instead of excerpts.
+
+**Rejected Because**:
+- Summaries become stale as documents change
+- Summary quality depends on server-side LLM (same problems as server-side generation)
+- Summaries are generic, not tailored to specific queries
+
+### Streaming Responses
+
+**Approach**: Use MCP sampling with streaming to return incremental answer chunks.
+
+**Deferred Because**:
+- MCP sampling streaming support unclear in current specification
+- Adds significant implementation complexity
+- Tool responses in MCP are typically atomic
+- Can be added later without breaking changes
+
+## Related Decisions
+
+**ADR-007**: Background Vector Sync provides the semantic search infrastructure that this ADR enhances with LLM generation.
+
+**ADR-004**: Progressive Consent architecture applies to sampling—users consent to sampling requests via MCP client approval prompts.
+
+## References
+
+- [MCP Specification - Sampling](https://modelcontextprotocol.io/docs/specification/2025-06-18/client/sampling)
+- [MCP Python SDK - ServerSession.create_message](https://github.com/modelcontextprotocol/python-sdk/blob/main/src/mcp/server/session.py#L215)
+- [MCP Python SDK - Sampling Example](https://github.com/modelcontextprotocol/python-sdk/blob/main/examples/snippets/servers/sampling.py)
+- [MCP Types - SamplingMessage](https://github.com/modelcontextprotocol/python-sdk/blob/main/src/mcp/types.py#L1038)
+- [MCP Types - CreateMessageResult](https://github.com/modelcontextprotocol/python-sdk/blob/main/src/mcp/types.py#L1073)
+- [Retrieval-Augmented Generation (RAG) - Lewis et al. 2020](https://arxiv.org/abs/2005.11401)
+
+## Implementation Checklist
+
+- [ ] Create ADR-008 document (this file)
+- [ ] Create `nextcloud_mcp_server/models/semantic.py` for semantic search models
+- [ ] Add `SamplingSearchResponse` model to `nextcloud_mcp_server/models/semantic.py`
+- [ ] Create `nextcloud_mcp_server/server/semantic.py` for semantic search tools
+- [ ] Implement `nc_semantic_search_answer` tool in `nextcloud_mcp_server/server/semantic.py`
+- [ ] Add MCP sampling type imports (`SamplingMessage`, `TextContent`, etc.)
+- [ ] Write unit tests with mocked sampling (`tests/unit/server/test_semantic.py`)
+- [ ] Create integration tests (`tests/integration/test_sampling.py`)
+- [ ] Update `README.md` with new tool documentation in dedicated Semantic Search section
+- [ ] Update `CLAUDE.md` with sampling pattern guidance
+- [ ] Test with MCP client supporting sampling (Claude Desktop, MCP Inspector with callbacks)
+- [ ] Document client requirements and fallback behavior
+- [ ] Update oauth-architecture.md to add semantic:read scope
+- [ ] Create ADR-009 to document semantic:read scope decision
@@ -0,0 +1,268 @@
+# ADR-009: Generic `semantic:read` OAuth Scope for Multi-App Vector Search
+
+**Status**: Proposed
+**Date**: 2025-01-11
+**Depends On**: ADR-007 (Background Vector Sync), ADR-008 (MCP Sampling for Semantic Search)
+
+## Context
+
+ADR-007 established a background vector synchronization architecture that indexes content from multiple Nextcloud apps (notes, calendar events, deck cards, files, contacts) into a unified vector database. ADR-008 introduced semantic search tools (`nc_semantic_search`, `nc_semantic_search_answer`) that query this vector database and use MCP sampling to generate natural language answers.
+
+The question is: **What OAuth scopes should protect semantic search operations?**
+
+### Option 1: App-Specific Scopes
+
+Require users to have scopes for each app they want to search:
+
+```python
+@mcp.tool()
+@require_scopes("notes:read", "calendar:read", "deck:read", "files:read", "contacts:read")
+async def nc_semantic_search(query: str, ctx: Context) -> SemanticSearchResponse:
+    """Search across all indexed apps"""
+```
+
+**Advantages**:
+- Granular control - users explicitly consent to searching each app
+- Aligns with app-specific authorization model
+- Clear security boundary - can only search apps you can access
+
+**Disadvantages**:
+- **Brittle user experience**: If a user grants only `notes:read` but the tool requires all 5 scopes, the tool becomes invisible/unusable
+- **All-or-nothing enforcement**: Can't search notes alone - must grant all scopes or none
+- **Poor progressive consent**: User can't start with notes search and later add calendar
+- **Scope inflation**: Every new app adds another required scope
+- **Mismatched semantics**: User thinks "I want to search my notes" but must grant calendar, deck, files, contacts just to make the tool appear
+
+### Option 2: Single Generic Scope (Chosen)
+
+Introduce a new semantic search-specific scope:
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(query: str, ctx: Context) -> SemanticSearchResponse:
+    """Search across all indexed apps"""
+```
+
+**Advantages**:
+- **Simple authorization**: One scope grants semantic search capability
+- **Progressive enablement**: User grants `semantic:read`, searches notes initially, then enables calendar indexing later
+- **Logical grouping**: Semantic search is a cross-app feature, deserving its own scope
+- **Future-proof**: New apps can be added to vector sync without changing OAuth scopes
+- **Matches user mental model**: "I want semantic search" → grant `semantic:read` (not "I want semantic search" → grant 5 unrelated app scopes)
+
+**Considerations**:
+- User could search apps they can't directly access via app-specific tools
+  - **Mitigation**: Dual-phase authorization (Phase 1: scope check passes with `semantic:read`, Phase 2: verify user can access each returned document via app-specific permissions)
+- Less granular than app-specific scopes
+  - **Counterpoint**: Semantic search is inherently cross-app - forcing per-app authorization defeats its purpose
+
+### Option 3: Hybrid Approach (Rejected)
+
+Support both: semantic search works with either `semantic:read` OR all app-specific scopes:
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read", alternative_scopes=["notes:read", "calendar:read", ...])
+async def nc_semantic_search(query: str, ctx: Context) -> SemanticSearchResponse:
+    """Search across all indexed apps"""
+```
+
+**Rejected Because**:
+- Adds complexity to scope validation logic
+- Unclear to users which scopes they should grant
+- Alternative scopes still suffer from all-or-nothing problem
+- No significant benefit over Option 2 with dual-phase authorization
+
+## Decision
+
+We will introduce two new OAuth scopes specifically for semantic search operations:
+
+- **`semantic:read`**: Query vector database, perform semantic search, generate answers
+- **`semantic:write`**: Enable/disable background vector synchronization, manage indexing settings
+
+These scopes are **independent** of app-specific scopes (notes:read, calendar:read, etc.).
+
+### Tool Scope Assignments
+
+**Read Operations**:
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(query: str, ctx: Context, limit: int = 10, score_threshold: float = 0.7) -> SemanticSearchResponse:
+    """Semantic search across all indexed Nextcloud apps"""
+
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search_answer(query: str, ctx: Context, limit: int = 5, max_answer_tokens: int = 500) -> SamplingSearchResponse:
+    """Semantic search with LLM-generated answer via MCP sampling"""
+
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_get_vector_sync_status(ctx: Context) -> VectorSyncStatusResponse:
+    """Get current vector synchronization status (indexed count, pending count, status)"""
+```
+
+**Write Operations**:
+```python
+@mcp.tool()
+@require_scopes("semantic:write")
+async def nc_enable_vector_sync(ctx: Context) -> VectorSyncResponse:
+    """Enable background vector synchronization for this user"""
+
+@mcp.tool()
+@require_scopes("semantic:write")
+async def nc_disable_vector_sync(ctx: Context) -> VectorSyncResponse:
+    """Disable background vector synchronization"""
+```
+
+### Dual-Phase Authorization
+
+To ensure users can only access documents they have permission to view, semantic search implements **dual-phase authorization**:
+
+**Phase 1: Scope Check** (MCP Server)
+- User must have `semantic:read` scope to call semantic search tools
+- This grants permission to query the vector database
+
+**Phase 2: Document Verification** (Per-Result Filtering)
+- For each returned document, verify user has access via app-specific permissions
+- Uses `DocumentVerifier` interface per app:
+  - Notes: Call `/apps/notes/api/v1/notes/{id}` - if 404/403, exclude from results
+  - Calendar: Call `/remote.php/dav/calendars/username/calendar/event.ics` - if 404/403, exclude
+  - Deck: Call `/apps/deck/api/v1.0/boards/{board_id}/stacks/{stack_id}/cards/{card_id}` - if 404/403, exclude
+  - Files: Call `/remote.php/dav/files/username/path` with PROPFIND - if 404/403, exclude
+  - Contacts: Call `/remote.php/dav/addressbooks/username/addressbook/contact.vcf` - if 404/403, exclude
+
+This two-phase approach ensures:
+1. Semantic search is a **distinct capability** (like "global search") requiring explicit consent
+2. Results are **filtered** to only include documents the user can access
+3. No privilege escalation - users can't discover content they shouldn't see
+
+**Implementation**: See ADR-007 Phase 3 (Document Verification) and `DocumentVerifier` interface.
+
+### Scope Discovery
+
+The new scopes will be:
+- **Advertised** via PRM endpoint (`/.well-known/oauth-protected-resource/mcp`)
+- **Dynamically discovered** from `@require_scopes` decorators on semantic search tools
+- **Documented** in OAuth architecture (oauth-architecture.md)
+- **Included** in default client registration scopes
+
+## Consequences
+
+### Benefits
+
+**User Experience**:
+- Simple authorization: one scope for semantic search capability
+- Progressive enablement: grant `semantic:read`, enable indexing for apps later
+- Natural mental model: "semantic search" is a distinct feature deserving its own scope
+
+**Security**:
+- Dual-phase authorization prevents privilege escalation
+- Users explicitly consent to cross-app search capability
+- Per-document verification ensures users only see accessible content
+
+**Maintainability**:
+- Adding new apps to vector sync doesn't require OAuth scope changes
+- Clear separation between app access (notes:read) and search capability (semantic:read)
+- Logical grouping of related operations (search, sync status, enable/disable)
+
+**Future-Proof**:
+- Can add new document types without breaking existing OAuth flows
+- Supports future semantic features (recommendations, clustering) under same scope
+- Aligns with potential future Nextcloud semantic capabilities
+
+### Trade-offs
+
+**Less Granular Than App-Specific Scopes**:
+- User can't grant "semantic search notes only"
+- Semantic search is all-or-nothing across enabled apps
+- **Mitigation**: Dual-phase verification ensures users only see documents they can access
+
+**New Scope to Learn**:
+- Users must understand `semantic:read` is distinct from app scopes
+- MCP clients must present scope clearly during consent
+- **Mitigation**: Clear scope descriptions in OAuth consent UI and documentation
+
+**Backend Complexity**:
+- Requires dual-phase authorization implementation
+- DocumentVerifier interface needed for each app
+- **Benefit**: Enforces proper security regardless of scope model
+
+### Migration Impact
+
+**Breaking Change**: Existing deployments using notes-specific semantic search will break.
+
+**Before (OLD - Breaking)**:
+```python
+@mcp.tool()
+@require_scopes("notes:read")
+async def nc_notes_semantic_search(query: str, ctx: Context) -> SemanticSearchResponse:
+    """Semantic search notes"""
+```
+
+**After (NEW)**:
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(query: str, ctx: Context) -> SemanticSearchResponse:
+    """Semantic search across all apps"""
+```
+
+**Migration Path**:
+1. Deploy server with new `semantic:read` scope
+2. Users re-authenticate, granting `semantic:read` scope
+3. Semantic search tools become visible/usable again
+4. **No data loss**: Vector database and indexed documents remain unchanged
+
+**Backward Compatibility**: None. This is an intentional breaking change to correct the scope model before broader adoption.
+
+## Alternatives Considered
+
+### Keep Notes-Specific Scopes
+
+**Approach**: Continue using `notes:read` for semantic search, even when searching other apps.
+
+**Rejected Because**:
+- Semantically incorrect - searching calendar events is not "reading notes"
+- Confuses users - why does searching calendar require notes:read?
+- Doesn't scale - what scope for multi-app search?
+
+### Create Per-App Semantic Scopes
+
+**Approach**: Introduce `notes:semantic`, `calendar:semantic`, `deck:semantic`, etc.
+
+**Rejected Because**:
+- Scope proliferation - doubles the number of scopes
+- Defeats purpose of unified vector search
+- Users would need to grant 5+ scopes for cross-app search
+- No clear benefit over dual-phase authorization with `semantic:read`
+
+### Require All App Scopes (Already Rejected in Option 1)
+
+**Approach**: Require `notes:read AND calendar:read AND deck:read AND files:read AND contacts:read`
+
+**Rejected Because**: Unusable UX (see Option 1 disadvantages above)
+
+## Related Decisions
+
+**ADR-007**: Background Vector Sync provides the indexing architecture that semantic scopes protect. The DocumentVerifier interface from ADR-007 Phase 3 implements dual-phase authorization.
+
+**ADR-008**: MCP Sampling for semantic search uses `semantic:read` to protect the sampling-enhanced search tool.
+
+**ADR-004**: Progressive Consent architecture supports users granting `semantic:read` initially, then enabling per-app indexing via `semantic:write` (enable_vector_sync with app selection).
+
+## Implementation Checklist
+
+- [ ] Create ADR-009 document (this file)
+- [ ] Update `oauth-architecture.md` to document `semantic:read` and `semantic:write` scopes ✅
+- [ ] Update `README.md` to show Semantic Search as separate tool category ✅
+- [ ] Update ADR-007 to reference `semantic:*` scopes instead of `sync:*` ✅
+- [ ] Update ADR-008 to use `semantic:read` instead of `notes:read` ✅
+- [ ] Implement DocumentVerifier interface for all apps (notes, calendar, deck, files, contacts)
+- [ ] Update semantic search tools to use `@require_scopes("semantic:read")`
+- [ ] Update vector sync tools to use `@require_scopes("semantic:write")`
+- [ ] Add dual-phase authorization to semantic search implementation
+- [ ] Test OAuth flow with `semantic:read` scope
+- [ ] Update scope discovery in PRM endpoint
+- [ ] Document migration path for existing deployments
@@ -0,0 +1,661 @@
+# ADR-010: Webhook-Based Vector Database Synchronization
+
+**Status**: Proposed
+**Date**: 2025-01-10
+**Depends On**: ADR-007 (Background Vector Sync)
+
+## Context
+
+ADR-007 established a background synchronization architecture for maintaining the vector database using periodic polling. The scanner task runs on a configurable interval (default 3600 seconds / 1 hour) to detect changed documents across Nextcloud apps. While this polling approach is simple and reliable, it introduces significant latency between content changes and vector database updates.
+
+### Current Polling Architecture
+
+The existing scanner implementation in `nextcloud_mcp_server/vector/scanner.py` operates as follows:
+
+1. **Periodic Scanning**: The scanner task sleeps for `vector_sync_scan_interval` seconds between runs
+2. **Change Detection**: For each scan, it:
+   - Fetches all documents from Nextcloud (notes, calendar events, etc.)
+   - Queries Qdrant for the last indexed timestamp of each document
+   - Compares modification timestamps to detect changes
+   - Queues changed documents for processing
+3. **Document Processing**: Processor tasks pull from the queue, generate embeddings, and update Qdrant
+
+This architecture works but has fundamental limitations:
+
+**Latency**: With a 1-hour scan interval, content changes can take up to 1 hour to appear in semantic search results. For time-sensitive use cases (e.g., "What's on my calendar today?"), this delay is problematic.
+
+**API Load**: Every scan fetches *all* documents for *all* enabled users, regardless of whether anything changed. For large deployments with thousands of documents, this generates significant unnecessary API traffic to Nextcloud.
+
+**Resource Waste**: The scanner and processors consume compute resources even when no content has changed. During periods of low activity, the system performs wasteful polling.
+
+**Scalability**: As the number of users and documents grows, the time required to complete a full scan increases. Eventually, the scan duration may exceed the scan interval, causing scans to run continuously without idle periods.
+
+**Rate Limiting**: Fetching all documents for all users in rapid succession can trigger Nextcloud's rate limiting, especially on shared hosting environments with restrictive API quotas.
+
+These limitations are inherent to any polling-based architecture. Reducing the scan interval (e.g., to 5 minutes) reduces latency but exacerbates API load, resource waste, and rate limiting issues. The fundamental problem is that the system has no way to know *when* content changes occur—it must repeatedly check to find out.
+
+### Nextcloud Webhook Listeners
+
+Nextcloud provides a webhook_listeners app (bundled with Nextcloud 30+) that enables push-based change notifications. Instead of polling for changes, external services can register webhook endpoints and receive HTTP POST requests when specific events occur. Administrators register these webhooks using Nextcloud's OCS API or occ commands.
+
+The webhook_listeners app supports events for all Nextcloud apps relevant to this MCP server's vector database:
+
+**Files/Notes Events** (notes are stored as files):
+- `OCP\Files\Events\Node\NodeCreatedEvent`
+- `OCP\Files\Events\Node\NodeWrittenEvent`
+- `OCP\Files\Events\Node\BeforeNodeDeletedEvent` ⭐ **Use this for deletion (includes node.id)**
+- `OCP\Files\Events\Node\NodeDeletedEvent` (missing node.id - file already deleted)
+- `OCP\Files\Events\Node\NodeRenamedEvent`
+- `OCP\Files\Events\Node\NodeCopiedEvent`
+
+**Calendar Events**:
+- `OCP\Calendar\Events\CalendarObjectCreatedEvent`
+- `OCP\Calendar\Events\CalendarObjectUpdatedEvent`
+- `OCP\Calendar\Events\CalendarObjectDeletedEvent`
+- `OCP\Calendar\Events\CalendarObjectMovedEvent`
+
+**Tables Events**:
+- `OCA\Tables\Event\RowAddedEvent`
+- `OCA\Tables\Event\RowUpdatedEvent`
+- `OCA\Tables\Event\RowDeletedEvent`
+
+**Deck Events** (via file events since cards are stored as files in some configurations)
+
+Each webhook notification includes rich metadata:
+- User ID who triggered the event
+- Timestamp of the event
+- Document ID and metadata
+- Operation type (create, update, delete)
+- Path information (for files)
+
+Webhook notifications are dispatched via background jobs, with configurable delivery guarantees. Administrators can set up dedicated webhook worker processes to achieve near-real-time delivery (within seconds of the triggering event).
+
+### Why Not Replace Polling Entirely?
+
+While webhooks provide superior latency and efficiency, they cannot fully replace polling:
+
+**Missed Events**: If the MCP server is down when a webhook fires, the notification is lost. Nextcloud's background job system processes webhooks asynchronously, but does not queue failed deliveries indefinitely.
+
+**Administrator Setup**: Webhooks must be registered by Nextcloud administrators using the OCS API or occ commands. This is an optional optimization that administrators can enable when they want to reduce polling frequency.
+
+**Filter Configuration**: Webhook filters must be carefully configured to avoid notification floods. A poorly configured filter could send thousands of notifications for bulk operations (e.g., importing a calendar with hundreds of events).
+
+**Graceful Degradation**: In environments where webhooks are not configured, the system continues using polling without any degradation in functionality.
+
+**Deletion Detection**: Nextcloud's webhook system does not guarantee delivery of deletion events if the user's account is removed or the app is uninstalled. Periodic polling provides a safety mechanism to detect orphaned documents.
+
+A complementary architecture where webhooks supplement (but don't replace) polling provides low-latency updates when configured, with polling ensuring reliability.
+
+### Design Considerations
+
+**Push vs Pull Trade-offs**:
+Webhooks introduce new failure modes (network issues, endpoint unavailability, notification floods) that polling avoids. The webhook endpoint must handle failures gracefully without blocking semantic search functionality.
+
+**Webhook Endpoint Security**:
+The MCP server exposes an HTTP endpoint to receive webhooks. Authentication is optional—in production deployments, administrators can configure Nextcloud to send an `Authorization` header that the MCP server validates. For local development, authentication can be disabled for simplicity.
+
+**Idempotency**:
+The system may receive duplicate notifications (webhook + next scan) or out-of-order notifications (update fires before create completes). Document processing must be idempotent—processing the same document multiple times produces the same result.
+
+**Asynchronous Processing**:
+Nextcloud processes webhooks via background jobs, introducing delivery latency (typically seconds to minutes depending on background job configuration). This affects testing strategies—integration tests cannot rely on immediate webhook delivery.
+
+**Deployment Patterns**:
+The MCP server webhook endpoint is accessible at the same host/port as the MCP server itself. Administrators configure Nextcloud to POST to `https://<mcp-server-host>:<port>/webhooks/nextcloud` when registering webhook listeners.
+
+## Decision
+
+We will add a webhook endpoint to the MCP server that receives change notifications from Nextcloud and queues documents for vector database processing. This complements the existing polling architecture from ADR-007 without replacing it—webhooks provide low-latency updates when configured, while polling ensures reliability regardless of webhook availability.
+
+The architecture is intentionally simple: the webhook endpoint is just another producer of `DocumentTask` objects that feed into the existing processor queue. The scanner task, processor pool, and queue management remain unchanged from ADR-007.
+
+### Architecture Components
+
+**1. Webhook Endpoint**
+
+A new Starlette HTTP route will be added to receive webhook notifications from Nextcloud:
+
+```python
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+@app.route("/webhooks/nextcloud", methods=["POST"])
+async def handle_nextcloud_webhook(request: Request) -> JSONResponse:
+    """
+    Receive webhook notifications from Nextcloud.
+
+    Parses event payload, extracts document metadata, and queues
+    changed documents for processing using the same queue as the scanner.
+    """
+    # 1. Optional authentication validation
+    if settings.webhook_secret:
+        auth_header = request.headers.get("authorization", "")
+        if not auth_header.startswith("Bearer ") or \
+           auth_header[7:] != settings.webhook_secret:
+            logger.warning("Webhook authentication failed")
+            return JSONResponse(
+                {"status": "error", "message": "Unauthorized"},
+                status_code=401
+            )
+
+    # 2. Parse webhook payload
+    payload = await request.json()
+    event_class = payload["event"]["class"]
+    user_id = payload["user"]["uid"]
+
+    # 3. Extract document metadata from event
+    doc_task = extract_document_task(event_class, payload)
+    if not doc_task:
+        return JSONResponse({"status": "ignored", "reason": "unsupported event"})
+
+    # 4. Send to processor queue (same queue as scanner)
+    try:
+        await webhook_send_stream.send(doc_task)
+        logger.info(f"Queued document from webhook: {doc_task}")
+        return JSONResponse({"status": "queued"})
+    except Exception as e:
+        logger.error(f"Failed to queue webhook document: {e}")
+        return JSONResponse(
+            {"status": "error", "message": str(e)},
+            status_code=500
+        )
+```
+
+The endpoint:
+- Validates optional authentication via `Authorization: Bearer <secret>` header
+- Parses various event types (calendar, files, tables) into `DocumentTask` objects
+- Sends to the same processing queue that the scanner uses
+- Returns quickly (<50ms) to avoid blocking Nextcloud's webhook workers
+- Handles errors gracefully (invalid payload, queue full, etc.)
+
+**2. Webhook Registration Helper (Development Only)**
+
+For development and testing purposes, a helper method will be added to `NextcloudClient` for registering webhooks via the OCS API. This is NOT exposed as an MCP tool—administrators register webhooks manually using Nextcloud's admin interface or the OCS API directly.
+
+```python
+class NextcloudClient:
+    async def register_webhook(
+        self,
+        event_type: str,
+        uri: str,
+        http_method: str = "POST",
+        auth_method: str = "none",
+        headers: dict[str, str] | None = None,
+    ) -> dict:
+        """
+        Register a webhook with Nextcloud (requires admin credentials).
+
+        Used for development/testing. Production admins should register
+        webhooks using Nextcloud's admin UI or occ commands.
+        """
+        # Implementation uses OCS API: POST /ocs/v2.php/apps/webhook_listeners/api/v1/webhooks
+        ...
+```
+
+This keeps webhook registration out of the MCP tool surface while providing a convenient API for integration tests.
+
+**3. Event Parsing**
+
+A helper function extracts `DocumentTask` from various Nextcloud event types:
+
+```python
+def extract_document_task(event_class: str, payload: dict) -> DocumentTask | None:
+    """Extract DocumentTask from webhook event payload."""
+    user_id = payload["user"]["uid"]
+    event_data = payload["event"]
+
+    # File/Note events
+    if "NodeCreatedEvent" in event_class or "NodeWrittenEvent" in event_class:
+        # Only process markdown files (notes)
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=event_data["node"]["id"],
+            doc_type="note",
+            operation="index",
+            modified_at=payload["time"],
+        )
+
+    # Calendar events
+    elif "CalendarObjectCreatedEvent" in event_class or \
+         "CalendarObjectUpdatedEvent" in event_class:
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=str(event_data["objectData"]["id"]),
+            doc_type="calendar_event",
+            operation="index",
+            modified_at=event_data["objectData"]["lastmodified"],
+        )
+
+    # Deletion events (use BeforeNodeDeletedEvent for files to get node.id)
+    elif "BeforeNodeDeletedEvent" in event_class or \
+         "NodeDeletedEvent" in event_class or \
+         "CalendarObjectDeletedEvent" in event_class:
+        # Similar logic for delete operations
+        ...
+
+    return None  # Unsupported event type
+```
+
+**4. No Changes to Scanner or Processors**
+
+The existing scanner task from ADR-007 continues operating unchanged. It polls Nextcloud on its configured interval (`VECTOR_SYNC_SCAN_INTERVAL`), discovers changed documents, and queues them for processing. The scanner is unaware of webhooks—it simply adds `DocumentTask` objects to the queue.
+
+Similarly, the processor pool continues pulling `DocumentTask` objects from the queue, generating embeddings, and updating Qdrant. Processors don't know or care whether a task came from the scanner or a webhook.
+
+This design keeps concerns separated: webhooks and scanner are independent producers, processors are independent consumers, and the queue mediates between them.
+
+### Configuration
+
+A new optional environment variable controls webhook authentication:
+
+```bash
+# Optional: Shared secret for webhook authentication
+# If set, webhooks must include "Authorization: Bearer <secret>" header
+# If unset, no authentication is required (useful for local development)
+WEBHOOK_SECRET=<generate-random-secret>
+```
+
+The webhook endpoint is automatically available at `/webhooks/nextcloud` when the MCP server starts. No feature flags or additional configuration needed—if Nextcloud sends webhooks to this endpoint, they will be processed.
+
+**Reducing Polling Frequency**: Administrators who configure webhooks may want to reduce polling frequency to minimize API load while maintaining safety reconciliation scans:
+
+```bash
+# Increase scan interval from 1 hour (default) to 24 hours
+VECTOR_SYNC_SCAN_INTERVAL=86400
+```
+
+This is a manual configuration decision, not automatic—the scanner doesn't adapt based on webhook availability.
+
+### Webhook Event Mapping
+
+The webhook handler maps Nextcloud events to document types:
+
+| Nextcloud Event | Document Type | Operation |
+|----------------|---------------|-----------|
+| `NodeCreatedEvent` (path: `*/files/*.md`) | `note` | `index` |
+| `NodeWrittenEvent` (path: `*/files/*.md`) | `note` | `index` |
+| `NodeDeletedEvent` (path: `*/files/*.md`) | `note` | `delete` |
+| `CalendarObjectCreatedEvent` | `calendar_event` | `index` |
+| `CalendarObjectUpdatedEvent` | `calendar_event` | `index` |
+| `CalendarObjectDeletedEvent` | `calendar_event` | `delete` |
+| `RowAddedEvent` | `table_row` | `index` |
+| `RowUpdatedEvent` | `table_row` | `index` |
+| `RowDeletedEvent` | `table_row` | `delete` |
+
+Path filters in webhook registration ensure only relevant files trigger notifications (e.g., exclude `.jpg`, `.mp4` for file events).
+
+### Administrator Setup
+
+Administrators who want to enable webhooks:
+
+1. **Enable webhook_listeners app** in Nextcloud: `occ app:enable webhook_listeners`
+2. **Register webhook endpoints** using Nextcloud's OCS API or admin UI:
+   - Endpoint: `https://<mcp-server-host>:<port>/webhooks/nextcloud`
+   - Events: File created/updated/deleted, Calendar object events, Table row events
+   - Filters: Exclude non-content files (images, videos), system directories
+   - Optional: Configure `Authorization: Bearer <WEBHOOK_SECRET>` header
+3. **Optionally reduce scanner frequency**: Set `VECTOR_SYNC_SCAN_INTERVAL=86400` (24 hours)
+4. **Set up webhook workers** (optional): Configure dedicated background job workers for low-latency delivery
+
+Existing deployments continue using polling without any changes. Webhooks are purely additive.
+
+## Consequences
+
+### Benefits
+
+**Reduced Latency**: With webhooks configured, content changes appear in semantic search within seconds to minutes (depending on Nextcloud background job configuration) instead of up to 1 hour. Queries like "What meetings do I have today?" reflect recent calendar updates.
+
+**Lower API Load**: Administrators who configure webhooks can reduce scanner frequency (e.g., 24-hour intervals), eliminating most polling API calls while maintaining safety reconciliation scans. This significantly reduces load on Nextcloud servers.
+
+**Better Scalability**: Webhooks scale better than polling as content volume grows. The system only processes changed documents instead of checking all documents every hour.
+
+**Simple Architecture**: The webhook endpoint is just another producer feeding the existing processor queue. No changes to scanner, processors, or queue management—webhooks integrate cleanly into the existing architecture.
+
+**Improved User Experience**: Lower-latency semantic search feels more responsive and accurate, especially for time-sensitive queries about recent changes.
+
+### Drawbacks
+
+**Manual Configuration**: Administrators must configure webhooks outside the MCP server using Nextcloud's admin tools. This adds setup complexity compared to the zero-configuration polling approach.
+
+**Deployment Requirements**: Webhooks require the MCP server to be reachable from Nextcloud via HTTP(S). Deployments behind NAT or with restrictive firewalls may not support webhooks without additional networking configuration.
+
+**Asynchronous Delivery**: Nextcloud processes webhooks via background jobs, introducing delivery latency (typically seconds to minutes). The exact latency depends on background job worker configuration and system load.
+
+**Testing Complexity**: Integration tests cannot rely on immediate webhook delivery due to asynchronous background job processing. Tests must either poll for results or mock webhook delivery directly.
+
+**New Failure Modes**: Webhook endpoint downtime, network issues between Nextcloud and MCP server, webhook notification floods from bulk operations. The system must handle these gracefully.
+
+**Version Dependencies**: The webhook_listeners app requires Nextcloud 30+. Older versions continue using polling exclusively.
+
+### Monitoring and Observability
+
+New metrics track webhook performance:
+
+- `webhook_notifications_received_total{event_type}`: Count of webhook notifications by event type
+- `webhook_processing_duration_seconds{event_type}`: Webhook handler latency
+- `webhook_errors_total{error_type}`: Failed webhook processing by error type (auth failure, parse error, queue full)
+
+Logs include:
+- Successful webhook processing: `Queued document from webhook: DocumentTask(...)`
+- Webhook authentication failures: `Webhook authentication failed`
+- Parse errors: `Failed to parse webhook payload: ...`
+- Unsupported events: `Ignoring webhook for unsupported event: ...`
+
+### Security Considerations
+
+**Optional Authentication**: When `WEBHOOK_SECRET` is configured, webhook requests must include `Authorization: Bearer <WEBHOOK_SECRET>` header. The server validates this before processing to prevent unauthorized document queueing. For local development, authentication can be disabled by leaving `WEBHOOK_SECRET` unset.
+
+**Payload Validation**: Webhook payloads are parsed and validated against expected schemas. Malformed payloads are rejected with 400 Bad Request responses.
+
+**No Scope Enforcement**: Unlike MCP tools, webhooks do not enforce progressive consent or check if users have enabled semantic search. Webhooks queue all document changes—administrators control which events trigger webhooks via Nextcloud filters. This keeps the webhook endpoint simple and stateless.
+
+### Testing Strategy
+
+**Unit Tests**: Test webhook handler logic, event parsing, and authentication validation using mocked payloads:
+
+```python
+async def test_webhook_endpoint_parses_note_created_event():
+    """Unit test: webhook endpoint extracts DocumentTask from note created event."""
+    payload = {
+        "user": {"uid": "alice"},
+        "time": 1704067200,
+        "event": {
+            "class": "OCP\\Files\\Events\\Node\\NodeCreatedEvent",
+            "node": {"id": "123", "path": "/alice/files/test.md"}
+        }
+    }
+    # Mock send_stream and verify DocumentTask is queued
+    ...
+```
+
+**Integration Tests (Without Real Webhooks)**: Since Nextcloud processes webhooks asynchronously via background jobs, integration tests should NOT rely on triggering real Nextcloud events and waiting for webhook delivery. Instead, tests should:
+
+1. **Mock webhook delivery**: POST webhook payloads directly to the `/webhooks/nextcloud` endpoint
+2. **Verify processing**: Check that documents are queued and eventually appear in Qdrant
+3. **Test authentication**: Verify requests without valid auth header are rejected (when `WEBHOOK_SECRET` is set)
+
+```python
+async def test_webhook_integration_mocked_delivery():
+    """Integration test: webhook handler queues document for processing."""
+    # POST webhook payload directly to endpoint (bypass Nextcloud)
+    response = await client.post("/webhooks/nextcloud", json=note_created_payload)
+    assert response.status_code == 200
+
+    # Wait for processor to handle document
+    await asyncio.sleep(2)
+
+    # Verify document appears in Qdrant
+    results = await qdrant_client.scroll(...)
+    assert len(results[0]) > 0
+```
+
+**Manual Testing (Real Webhooks)**: For end-to-end validation with real Nextcloud webhook delivery:
+
+1. Register webhook via OCS API or `NextcloudClient.register_webhook()` helper
+2. Configure webhook background job workers for low-latency delivery
+3. Trigger Nextcloud events (create note, add calendar event)
+4. Monitor MCP server logs for webhook delivery
+5. Verify documents appear in Qdrant after background job processing
+
+**Failure Mode Tests**:
+- Invalid authentication: Verify 401 response when auth header is missing/incorrect
+- Malformed payload: Verify 400 response for invalid JSON or missing required fields
+- Unsupported event types: Verify graceful handling (ignored, not error)
+- Queue full: Verify 500 response with appropriate error message
+
+### Future Enhancements
+
+**Batch Processing**: Group multiple webhook notifications within a short time window (e.g., 5 seconds) into a single batch before queueing. This reduces processor overhead during bulk operations like importing calendars.
+
+**Webhook Payload Optimization**: For large documents, Nextcloud could be configured to send minimal metadata in webhooks (just user_id, doc_id, doc_type), with processors fetching full content lazily. This reduces webhook payload size and network bandwidth.
+
+**Deduplication Window**: Track recently processed documents (last 5 minutes) to avoid redundant work when webhooks and scanner both detect the same change. The processor can check a simple in-memory cache before fetching document content.
+
+## Appendix A: Manual Webhook Testing Results (2025-01-11)
+
+### Testing Summary
+
+Manual validation of Nextcloud webhook schemas and behavior confirmed that webhooks work as documented with several important findings for implementation. **5 out of 6** webhook types were successfully captured and validated.
+
+**Test Environment:**
+- Nextcloud 30+ (Docker compose)
+- webhook_listeners app enabled
+- Test endpoint: `http://mcp:8000/webhooks/nextcloud`
+- Background webhook worker running (60s timeout)
+
+**Results:**
+- ✅ NodeCreatedEvent (file creation)
+- ✅ NodeWrittenEvent (file update)
+- ✅ NodeDeletedEvent (file deletion)
+- ✅ CalendarObjectCreatedEvent
+- ✅ CalendarObjectUpdatedEvent
+- ❌ CalendarObjectDeletedEvent (webhook did not fire - potential Nextcloud bug)
+
+### Critical Implementation Findings
+
+#### 1. Deletion Events Lack `node.id` Field
+
+**Finding:** `NodeDeletedEvent` payloads do NOT include `event.node.id`, only `event.node.path`.
+
+**Example:**
+```json
+{
+  "user": {"uid": "admin", "displayName": "admin"},
+  "time": 1762851093,
+  "event": {
+    "class": "OCP\\Files\\Events\\Node\\NodeDeletedEvent",
+    "node": {
+      "path": "/admin/files/Notes/Webhooks/Webhook Test Note.md"
+      // NOTE: No "id" field present
+    }
+  }
+}
+```
+
+**Impact:** The event parser in this ADR's example code assumes `event_data["node"]["id"]` exists for all file events. This will fail for deletions.
+
+**Update (2025-11-11):** Nextcloud maintainer clarified that `BeforeNodeDeletedEvent` should be used instead of `NodeDeletedEvent` to access `node.id` before the file is deleted. See [issue #56371](https://github.com/nextcloud/server/issues/56371#issuecomment-2470896634).
+
+> "Try using the `BeforeNodeDeletedEvent`. The `id` should still be available at that time. The reason `id` is not in `NodeDeletedEvent` is because the file is effectively guaranteed to be gone and, in turn, so is the FileInfo."
+> — Josh Richards, Nextcloud maintainer
+
+**Recommended Solution:** Use `OCP\Files\Events\Node\BeforeNodeDeletedEvent` for file deletion webhooks instead of `NodeDeletedEvent`.
+
+**Alternative Fix (if using NodeDeletedEvent):** Check for `id` existence and fall back to path-based identification:
+
+```python
+def extract_document_task(event_class: str, payload: dict) -> DocumentTask | None:
+    user_id = payload["user"]["uid"]
+    event_data = payload["event"]
+
+    # File deletion events - NO node.id field
+    if "NodeDeletedEvent" in event_class:
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+        # Use path-based ID since node.id is unavailable
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=f"path:{path}",  # Prefix to distinguish from numeric IDs
+            doc_type="note",
+            operation="delete",
+            modified_at=payload["time"],
+        )
+
+    # File creation/update events - node.id exists
+    elif "NodeCreatedEvent" in event_class or "NodeWrittenEvent" in event_class:
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+
+        # Check if 'id' exists (should, but be defensive)
+        node_id = event_data["node"].get("id")
+        if not node_id:
+            # Fallback for missing ID
+            node_id = f"path:{path}"
+
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=str(node_id),
+            doc_type="note",
+            operation="index",
+            modified_at=payload["time"],
+        )
+```
+
+**Qdrant Deletion Strategy:** When deleting by path-based ID, search Qdrant for documents with matching path metadata:
+
+```python
+async def delete_document_by_path(user_id: str, path: str):
+    """Delete document from Qdrant using path (when ID unavailable)."""
+    points = await qdrant.scroll(
+        collection_name=collection,
+        scroll_filter=Filter(must=[
+            FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+            FieldCondition(key="metadata.path", match=MatchValue(value=path)),
+        ]),
+    )
+    # Delete found points...
+```
+
+#### 2. Multiple Webhooks Per Operation
+
+**Finding:** Creating a single note triggers 3-5 separate webhook events in rapid succession:
+
+1. `NodeCreatedEvent` for parent folder (if new)
+2. `NodeWrittenEvent` for parent folder
+3. `NodeCreatedEvent` for the note file
+4. `NodeWrittenEvent` for the note file (sometimes fires twice)
+
+**Impact:** Without deduplication, the processor will fetch and index the same note multiple times within seconds, wasting compute and API quota.
+
+**Solution:** The processor queue should be idempotent. If the same document is queued multiple times, only the latest version needs processing. Implementation options:
+
+1. **Queue-level deduplication:** Before adding to queue, check if a task for the same `(user_id, doc_id)` is already pending. Replace the existing task instead of adding duplicate.
+
+2. **Processor-level deduplication:** Track recently processed documents in a short-lived cache (5 minutes). If a document was just processed, skip redundant fetch unless the `modified_at` timestamp is newer.
+
+3. **Accept duplicates:** Let the processor handle duplicates naturally. Qdrant upserts are idempotent—reindexing with identical content is harmless but wasteful.
+
+**Recommendation:** Implement queue-level deduplication by maintaining a map of pending tasks and replacing duplicates with newer timestamps.
+
+#### 3. Type Discrepancy in `node.id`
+
+**Finding:** Nextcloud documentation specifies `node.id` as type `string`, but actual payloads return `int`:
+
+```json
+"node": {
+  "id": 437,  // integer, not "437"
+  "path": "/admin/files/Notes/Webhooks/Webhook Test Note.md"
+}
+```
+
+**Impact:** Code that assumes `node.id` is always a string will work but may cause type confusion in strongly-typed languages.
+
+**Solution:** Explicitly convert to string when extracting: `doc_id=str(event_data["node"]["id"])`
+
+#### 4. Calendar Events Have Different ID Field Path
+
+**Finding:** Calendar events store the document ID in a different location than file events:
+
+- **File events:** `event.node.id`
+- **Calendar events:** `event.objectData.id`
+
+**Impact:** Event parser must handle different field paths for different event types. The example code in this ADR correctly shows this difference.
+
+**Calendar Event Deletion:** Calendar deletion webhooks did NOT fire during testing. This may be a Nextcloud bug or require specific configuration (e.g., trash bin enabled). Until resolved, calendar deletions will only be detected via periodic scanner runs.
+
+#### 5. Rich Metadata in Calendar Webhooks
+
+**Finding:** Calendar webhook payloads include extensive metadata not present in file webhooks:
+
+```json
+{
+  "event": {
+    "calendarId": 1,
+    "calendarData": {
+      "id": 1,
+      "uri": "personal",
+      "{http://calendarserver.org/ns/}getctag": "...",
+      "{http://sabredav.org/ns}sync-token": 21,
+      // ... many calendar-level properties
+    },
+    "objectData": {
+      "id": 3,
+      "uri": "webhook-test-event-001.ics",
+      "lastmodified": 1762851169,
+      "etag": "\"2b937b7d77dc83c77329dfdb210ba9d0\"",
+      "calendarid": 1,
+      "size": 297,
+      "component": "vevent",
+      "classification": 0,
+      "uid": "webhook-test-event-001@nextcloud",
+      "calendardata": "BEGIN:VCALENDAR\r\nVERSION:2.0\r\n...",  // Full iCal
+      "{http://nextcloud.com/ns}deleted-at": null
+    },
+    "shares": []  // Array of sharing info
+  }
+}
+```
+
+**Opportunity:** The full iCal content is available in `objectData.calendardata`. The processor could extract metadata directly from the webhook payload instead of making an additional CalDAV request, reducing API load.
+
+### Updated Event Mapping
+
+Based on testing, the actual webhook behavior:
+
+| Nextcloud Event | Fires? | `node.id`/`objectData.id` Present? | Notes |
+|----------------|--------|-------------------------------------|-------|
+| `NodeCreatedEvent` | ✅ Yes | ✅ Yes (`int`) | Fires for folders too |
+| `NodeWrittenEvent` | ✅ Yes | ✅ Yes (`int`) | Fires 1-2x per operation |
+| `NodeDeletedEvent` | ✅ Yes | ❌ **NO** (only `path`) | Critical difference |
+| `CalendarObjectCreatedEvent` | ✅ Yes | ✅ Yes (`objectData.id`) | Full iCal included |
+| `CalendarObjectUpdatedEvent` | ✅ Yes | ✅ Yes (`objectData.id`) | Full iCal included |
+| `CalendarObjectDeletedEvent` | ❌ **DID NOT FIRE** | ❓ Unknown | Possible Nextcloud bug |
+
+### Recommended Implementation Changes
+
+The webhook handler code in this ADR requires these modifications:
+
+1. **Handle missing `node.id` in deletions** (see code example in Finding #1)
+2. **Add deduplication logic** to prevent redundant processing from multiple webhooks per operation
+3. **Validate field existence** before accessing nested properties (`get()` with defaults)
+4. **Log unsupported events** at DEBUG level (not WARNING) to avoid log noise
+5. **Add calendar deletion fallback:** Since webhook unreliable, calendar deletions rely on scanner reconciliation
+6. **Consider payload optimization:** Extract calendar metadata from webhook payload to reduce CalDAV API calls
+
+### Testing Implications
+
+**Integration Test Strategy:**
+
+The asynchronous nature of Nextcloud webhooks makes real webhook delivery unreliable for automated tests:
+
+- ✅ **DO:** POST webhook payloads directly to `/webhooks/nextcloud` endpoint in tests
+- ❌ **DON'T:** Trigger Nextcloud events and wait for webhook delivery
+- ✅ **DO:** Test authentication, payload parsing, and queue integration with mocked payloads
+- ❌ **DON'T:** Assume webhooks fire immediately or reliably
+
+**Manual Testing Required:**
+- Real webhook delivery latency (depends on background job workers)
+- Calendar deletion webhook behavior (confirm bug or configuration issue)
+- Behavior under high-frequency updates (bulk operations)
+- Network failure handling (Nextcloud can't reach MCP server)
+
+### Complete Tested Payload Examples
+
+See `webhook-testing-findings.md` in the repository root for:
+- Complete JSON payloads for all tested events
+- Detailed schema validation results
+- Additional edge cases and observations
+- Screenshots of webhook logs
+
+## References
+
+- ADR-007: Background Vector Database Synchronization (polling architecture)
+- Nextcloud Documentation: `~/Software/documentation/admin_manual/webhook_listeners/index.rst`
+- Nextcloud OCS API: Webhook registration endpoint
+- Current scanner implementation: `nextcloud_mcp_server/vector/scanner.py:37`
+- Webhook Testing Report: `webhook-testing-findings.md` (2025-01-11)
@@ -0,0 +1,943 @@
+# ADR-011: Improving Semantic Search Quality Through Better Chunking and Embeddings
+
+**Status**: Partially Implemented (Chunking Complete, Embeddings Pending)
+**Date**: 2025-11-12
+**Implementation Date**: 2025-11-18 (Chunking)
+**Authors**: Development Team
+**Related**: ADR-003 (Vector Database Architecture), ADR-008 (MCP Sampling for RAG)
+
+## Context
+
+The semantic search implementation provides document retrieval across Nextcloud apps using vector embeddings. Production usage has revealed that **the system frequently misses relevant documents** (recall problem).
+
+Root cause analysis identifies two fundamental issues:
+
+### 1. Poor Chunking Strategy
+
+**Current Implementation** (`nextcloud_mcp_server/vector/document_chunker.py:36`):
+```python
+words = content.split()  # Naive whitespace splitting
+chunk_size = 512  # words
+overlap = 50  # words
+chunks = [words[i:i+chunk_size] for i in range(0, len(words), chunk_size-overlap)]
+```
+
+**Problems**:
+- **Breaks semantic boundaries**: Splits mid-sentence, mid-paragraph, mid-thought
+- **Loses context**: "The meeting discussed budget. We decided to..." becomes two disconnected chunks
+- **Poor retrieval**: Relevant content split across chunks with low individual relevance scores
+- **No structure awareness**: Ignores markdown headers, lists, code blocks
+
+**Evidence**:
+- Documents with relevant content in middle sections score poorly (content split across 3+ chunks)
+- Multi-sentence concepts (spanning 60-100 words) are fragmented
+- Search for "budget planning process" misses documents where these words appear in adjacent sentences but different chunks
+
+### 2. Suboptimal Embedding Model
+
+**Current Implementation** (`nextcloud_mcp_server/embedding/ollama_provider.py:33`):
+```python
+_model = "nomic-embed-text"  # 768 dimensions
+_dimension = 768  # Hardcoded
+```
+
+**Problems**:
+- **Model selection**: `nomic-embed-text` is general-purpose, not optimized for our use case
+- **No benchmarking**: Selected without comparative evaluation
+- **Dimensionality**: 768-dim may be insufficient for nuanced semantic distinctions
+- **No domain adaptation**: Model not tuned for Nextcloud content (notes, calendar, deck cards)
+
+**Evidence**:
+- Synonymous queries return different results ("meeting notes" vs. "discussion summary")
+- Domain-specific terms poorly represented ("standup", "retrospective", "OKRs")
+- Cross-lingual content (if present) not well supported
+
+### Current Performance
+
+**Baseline Metrics** (100-document test corpus, 50 queries):
+- **Recall@10**: ~52% (misses 48% of relevant documents)
+- **Precision@10**: ~78% (acceptable but room for improvement)
+- **MRR**: 0.58 (relevant docs often not in top positions)
+- **Zero-result queries**: 18% (completely missing relevant content)
+
+## Decision Drivers
+
+1. **Address Root Causes**: Fix fundamental issues (chunking, embeddings) before adding complexity (reranking, hybrid search)
+2. **Measurable Impact**: Target 40-60% improvement in recall through chunking/embedding alone
+3. **Independence**: Improvements should be orthogonal to future enhancements (reranking, GraphRAG)
+4. **Cost Efficiency**: Minimize infrastructure and API costs
+5. **Reindexing Acceptable**: One-time reindex cost justified by long-term quality improvement
+
+## Options Considered
+
+### Chunking Strategies
+
+#### Option C1: Semantic Sentence-Aware Chunking (RECOMMENDED)
+
+**Description**: Respect sentence boundaries while maintaining target chunk size
+
+**Implementation**:
+```python
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=2048,  # ~512 words in characters
+    chunk_overlap=200,  # ~50 words in characters
+    separators=["\n\n", "\n", ". ", "! ", "? ", "; ", ": ", ", ", " "],
+    length_function=len,
+)
+```
+
+**How it works**:
+1. Try splitting by paragraphs (`\n\n`)
+2. If chunks too large, split by sentences (`. `, `! `, `? `)
+3. If still too large, split by clauses (`;`, `:`)
+4. Last resort: split by words
+
+**Pros**:
+- ✅ Preserves semantic boundaries (never breaks mid-sentence)
+- ✅ Maintains context coherence within chunks
+- ✅ Simple implementation (langchain library)
+- ✅ Configurable separators for different content types
+- ✅ Proven approach (used by major RAG systems)
+
+**Cons**:
+- ❌ Variable chunk sizes (not exactly 512 words, but close)
+- ❌ Adds dependency (langchain)
+- ❌ Slightly slower than naive splitting (~10-20ms per document)
+
+**Expected Impact**: 20-30% recall improvement
+
+#### Option C2: Hierarchical Context-Preserving Chunks
+
+**Description**: Create overlapping parent/child chunks
+
+**Structure**:
+```
+Document → Large parent chunks (1024 words) → Small child chunks (256 words)
+          ↓                                    ↓
+   Stored in Qdrant                       Searched first
+                                          Return parent context
+```
+
+**Implementation**:
+```python
+# Generate child chunks (searched)
+child_chunks = splitter.split_text(content, chunk_size=1024)
+
+# Generate parent chunks (context)
+parent_chunks = splitter.split_text(content, chunk_size=4096)
+
+# Store both with parent-child relationships
+for child_idx, child in enumerate(child_chunks):
+    parent_idx = find_parent(child_idx)
+    store_vector(
+        vector=embed(child),
+        payload={
+            "chunk": child,
+            "parent_chunk": parent_chunks[parent_idx],
+            "chunk_type": "child"
+        }
+    )
+```
+
+**Pros**:
+- ✅ Best of both worlds: precise matching + full context
+- ✅ Handles multi-hop information needs
+- ✅ Better for long documents (> 1000 words)
+
+**Cons**:
+- ❌ 2x storage (parent + child chunks)
+- ❌ More complex implementation
+- ❌ Higher indexing time (embed twice)
+- ❌ Query complexity (retrieve child, return parent)
+
+**Expected Impact**: 35-45% recall improvement (diminishing returns vs. complexity)
+
+**Verdict**: ⚠️ Consider only if Option C1 insufficient
+
+#### Option C3: Document Structure-Aware Chunking
+
+**Description**: Parse markdown/document structure before chunking
+
+**Implementation**:
+```python
+import mistune  # Markdown parser
+
+def structure_aware_chunk(markdown_content: str) -> list[str]:
+    ast = mistune.create_markdown(renderer='ast')(markdown_content)
+
+    chunks = []
+    for node in ast:
+        if node['type'] == 'heading':
+            # Start new chunk at each header
+            current_chunk = node['children'][0]['raw']
+        elif node['type'] == 'paragraph':
+            current_chunk += "\n" + node['children'][0]['raw']
+            if len(current_chunk) > 2048:
+                chunks.append(current_chunk)
+                current_chunk = ""
+
+    return chunks
+```
+
+**Pros**:
+- ✅ Respects document logical structure
+- ✅ Headers provide context for chunks
+- ✅ Works well for structured notes (documentation, meeting notes with sections)
+
+**Cons**:
+- ❌ Complex implementation (parser, AST traversal)
+- ❌ Markdown-specific (doesn't help calendar events, deck cards)
+- ❌ Variable chunk sizes (some sections very short/long)
+- ❌ Breaks for unstructured content
+
+**Expected Impact**: 15-25% improvement for structured content only
+
+**Verdict**: ⚠️ Future enhancement after Option C1
+
+#### Option C4: Fixed Sliding Window (Current Baseline)
+
+**Description**: Current naive word-based splitting
+
+**Verdict**: ❌ Superseded by Option C1
+
+### Embedding Model Strategies
+
+#### Option E1: Upgrade to Better General-Purpose Model (RECOMMENDED)
+
+**Description**: Switch to state-of-the-art embedding model
+
+**Candidates**:
+
+| Model | Dimensions | MTEB Score | Pros | Cons |
+|-------|-----------|------------|------|------|
+| **mxbai-embed-large** | 1024 | 64.68 | Best performance, good balance | Larger (slower) |
+| **nomic-embed-text-v1.5** | 768 | 62.39 | Upgraded version of current | Incremental improvement |
+| **bge-large-en-v1.5** | 1024 | 64.23 | Excellent for English | Not multilingual |
+| **nomic-embed-text** (current) | 768 | 60.10 | Baseline | Lower performance |
+
+**MTEB**: Massive Text Embedding Benchmark (higher = better semantic understanding)
+
+**Recommendation**: **mxbai-embed-large-v1**
+- Best MTEB score (64.68)
+- 1024 dimensions (richer semantic space)
+- Works well via Ollama
+- ~15-20% better retrieval quality in benchmarks
+
+**Implementation**:
+```python
+# config.py
+OLLAMA_EMBEDDING_MODEL = "mxbai-embed-large-v1"  # Changed from nomic-embed-text
+
+# ollama_provider.py
+async def get_dimension(self) -> int:
+    # Query Ollama for actual dimension instead of hardcoding
+    response = await self.client.post("/api/show", json={"name": self.model})
+    return response.json()["details"]["embedding_length"]
+```
+
+**Migration**:
+1. Deploy new model to Ollama
+2. Create new Qdrant collection (different dimension)
+3. Reindex all documents with new embeddings
+4. Swap collections atomically
+5. Delete old collection
+
+**Pros**:
+- ✅ Immediate quality improvement (15-20%)
+- ✅ Simple change (config + reindex)
+- ✅ No code complexity
+- ✅ Future-proof (state-of-the-art model)
+
+**Cons**:
+- ❌ Requires full reindex (2-4 hours for 1000 documents)
+- ❌ Larger model = slower embedding (~50ms vs. 30ms per chunk)
+- ❌ Higher dimensionality = more storage (~30% increase)
+
+**Expected Impact**: 15-25% recall improvement
+
+#### Option E2: Multi-Vector Embeddings (ColBERT-style)
+
+**Description**: Generate multiple embeddings per chunk (token-level)
+
+**Architecture**:
+```
+Chunk → Transformer → Token embeddings (e.g., 50 tokens × 128 dim) → Store all
+Query → Transformer → Token embeddings → MaxSim(query_tokens, doc_tokens)
+```
+
+**MaxSim scoring**:
+```python
+def maxsim_score(query_embeddings, doc_embeddings):
+    # For each query token, find max similarity with any doc token
+    scores = []
+    for q_emb in query_embeddings:
+        max_sim = max(cosine_similarity(q_emb, d_emb) for d_emb in doc_embeddings)
+        scores.append(max_sim)
+    return sum(scores)
+```
+
+**Pros**:
+- ✅ Best retrieval quality (state-of-the-art results)
+- ✅ Fine-grained matching (token-level)
+- ✅ Handles partial matches better
+
+**Cons**:
+- ❌ **50-100x storage increase** (50 vectors per chunk vs. 1)
+- ❌ **Slower search** (compute MaxSim for each candidate)
+- ❌ **Complex implementation** (custom scoring, storage schema)
+- ❌ **Requires specialized model** (ColBERTv2, not available in Ollama)
+
+**Expected Impact**: 40-50% improvement, but at very high cost
+
+**Verdict**: ❌ Too complex, too expensive for marginal gain over E1+C1
+
+#### Option E3: Fine-Tuned Domain-Specific Model
+
+**Description**: Fine-tune embedding model on Nextcloud corpus
+
+**Process**:
+1. Collect training data (query-document pairs)
+2. Fine-tune base model (e.g., `nomic-embed-text`) on domain data
+3. Deploy fine-tuned model via Ollama
+4. Reindex with fine-tuned embeddings
+
+**Training data needed**:
+- 1,000+ query-document pairs
+- Labeled relevance (positive/negative examples)
+- Representative of real usage
+
+**Pros**:
+- ✅ Optimized for specific content (notes, calendar, deck)
+- ✅ Better handling of domain terminology
+- ✅ Highest potential quality improvement (30-40%)
+
+**Cons**:
+- ❌ **Requires training data** (expensive to collect)
+- ❌ **GPU infrastructure** needed for fine-tuning
+- ❌ **Expertise required** (ML/NLP knowledge)
+- ❌ **Maintenance burden** (retrain as corpus evolves)
+- ❌ **Time investment**: 2-4 weeks initial setup
+
+**Expected Impact**: 30-40% improvement, but high cost
+
+**Verdict**: ⚠️ Consider only if E1+C1 insufficient AND have training data
+
+#### Option E4: Ensemble Embeddings
+
+**Description**: Generate embeddings with multiple models, combine scores
+
+**Implementation**:
+```python
+models = ["mxbai-embed-large-v1", "bge-large-en-v1.5"]
+
+# Index
+embeddings = [await embed(chunk, model) for model in models]
+store_multi_vector(embeddings)
+
+# Search
+query_embeddings = [await embed(query, model) for model in models]
+scores = [search(q_emb, model) for q_emb, model in zip(query_embeddings, models)]
+combined_score = 0.5 * scores[0] + 0.5 * scores[1]
+```
+
+**Pros**:
+- ✅ Robust to individual model weaknesses
+- ✅ Better coverage of semantic space
+
+**Cons**:
+- ❌ 2x storage and compute
+- ❌ Complex scoring and fusion
+- ❌ Marginal improvement (~5-10%) over single best model
+
+**Expected Impact**: 5-10% over best single model
+
+**Verdict**: ❌ Not worth complexity
+
+### Combined Strategies
+
+#### Option D1: Best Chunking + Best Embedding (RECOMMENDED)
+
+**Combination**: Option C1 (Semantic Chunking) + Option E1 (mxbai-embed-large-v1)
+
+**Expected Impact**:
+- Chunking: +20-30% recall
+- Embedding: +15-25% recall
+- **Combined**: +35-55% recall improvement (not strictly additive, but significant)
+
+**Cost**:
+- Development: 1-2 days
+- Reindex: 2-4 hours (one-time)
+- Ongoing: None (same infrastructure)
+
+**Pros**:
+- ✅ Addresses both root causes
+- ✅ Orthogonal improvements (chunking + embedding)
+- ✅ Simple implementation
+- ✅ No new infrastructure
+- ✅ Future-proof foundation for additional enhancements (reranking, hybrid search)
+
+**Cons**:
+- ❌ Requires full reindex (manageable)
+- ❌ Slightly higher storage (1024 vs. 768 dim)
+
+**Verdict**: ✅ **RECOMMENDED**
+
+## Decision
+
+**Adopt Option D1: Semantic Chunking + Upgraded Embedding Model**
+
+Implement both improvements together to maximize recall improvement:
+
+### 1. Semantic Sentence-Aware Chunking
+
+**Changes**:
+- Replace naive word splitting with `RecursiveCharacterTextSplitter`
+- Preserve sentence boundaries, paragraph structure
+- Maintain similar chunk sizes (~512 words / 2048 characters)
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/vector/document_chunker.py
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+class DocumentChunker:
+    """Chunk documents into semantically coherent pieces."""
+
+    def __init__(
+        self,
+        chunk_size: int = 2048,  # Characters, not words
+        chunk_overlap: int = 200,  # Characters, not words
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            separators=[
+                "\n\n",  # Paragraphs (highest priority)
+                "\n",    # Lines
+                ". ",    # Sentences
+                "! ",
+                "? ",
+                "; ",    # Clauses
+                ": ",
+                ", ",    # Phrases
+                " ",     # Words (last resort)
+            ],
+            length_function=len,
+            is_separator_regex=False,
+        )
+
+    def chunk_text(self, content: str) -> list[str]:
+        """
+        Chunk text while preserving semantic boundaries.
+
+        Args:
+            content: Full document text
+
+        Returns:
+            List of text chunks, each ending at a semantic boundary
+        """
+        if not content:
+            return []
+
+        # Use RecursiveCharacterTextSplitter for semantic boundaries
+        chunks = self.splitter.split_text(content)
+
+        return chunks
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old (word-based)
+DOCUMENT_CHUNK_SIZE: int = 512  # words
+DOCUMENT_CHUNK_OVERLAP: int = 50  # words
+
+# New (character-based, more precise)
+DOCUMENT_CHUNK_SIZE: int = 2048  # characters (~512 words)
+DOCUMENT_CHUNK_OVERLAP: int = 200  # characters (~50 words)
+```
+
+**Dependency** (`pyproject.toml`):
+```toml
+[project]
+dependencies = [
+    # ... existing dependencies
+    "langchain-text-splitters>=0.2.0",
+]
+```
+
+### 2. Upgrade Embedding Model
+
+**Changes**:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Dynamic dimension detection (query Ollama instead of hardcoding)
+- Create new Qdrant collection for new dimensions
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/embedding/ollama_provider.py
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    def __init__(self, base_url: str, model: str, verify_ssl: bool = True):
+        self.base_url = base_url
+        self.model = model
+        self._dimension: int | None = None  # Changed: query dynamically
+        self.client = httpx.AsyncClient(base_url=base_url, verify=verify_ssl)
+
+    async def dimension(self) -> int:
+        """Get embedding dimension from Ollama API."""
+        if self._dimension is None:
+            try:
+                response = await self.client.post(
+                    "/api/show",
+                    json={"name": self.model},
+                    timeout=10.0,
+                )
+                response.raise_for_status()
+                info = response.json()
+                self._dimension = info.get("details", {}).get("embedding_length")
+
+                if self._dimension is None:
+                    # Fallback: generate test embedding to detect dimension
+                    test_emb = await self.embed("test")
+                    self._dimension = len(test_emb)
+
+            except Exception as e:
+                logger.warning(f"Failed to get dimension from Ollama: {e}, using fallback")
+                # Fallback dimensions by model name
+                if "mxbai-embed-large" in self.model:
+                    self._dimension = 1024
+                elif "nomic-embed-text" in self.model:
+                    self._dimension = 768
+                else:
+                    self._dimension = 768  # Default
+
+        return self._dimension
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old
+OLLAMA_EMBEDDING_MODEL: str = "nomic-embed-text"
+
+# New
+OLLAMA_EMBEDDING_MODEL: str = "mxbai-embed-large-v1"
+```
+
+**Environment Variable**:
+```bash
+OLLAMA_EMBEDDING_MODEL=mxbai-embed-large-v1
+```
+
+### 3. Migration Strategy
+
+**Reindexing Process**:
+
+```python
+# nextcloud_mcp_server/vector/migration.py
+
+async def migrate_to_new_embeddings():
+    """
+    Migrate from old embeddings to new embeddings.
+
+    Process:
+    1. Create new collection with new dimension
+    2. Reindex all documents with new embeddings
+    3. Atomic swap (update collection name in config)
+    4. Delete old collection
+    """
+    old_collection = "nextcloud_content"
+    new_collection = "nextcloud_content_v2"
+
+    # 1. Create new collection
+    await qdrant_client.create_collection(
+        collection_name=new_collection,
+        vectors_config=VectorParams(
+            size=1024,  # mxbai-embed-large-v1 dimension
+            distance=Distance.COSINE,
+        ),
+    )
+
+    # 2. Reindex all documents
+    logger.info("Starting reindex with new embeddings...")
+    scanner = VectorScanner(...)
+    processor = VectorProcessor(collection_name=new_collection, ...)
+
+    await scanner.scan_all()  # Rescans and re-embeds all documents
+
+    # 3. Wait for completion
+    while True:
+        status = await get_sync_status()
+        if status.pending_documents == 0:
+            break
+        await asyncio.sleep(5)
+
+    # 4. Atomic swap
+    # Update config to point to new collection
+    # (or use collection alias in Qdrant)
+    await qdrant_client.update_collection_aliases(
+        change_aliases_operations=[
+            CreateAliasOperation(
+                create_alias=CreateAlias(
+                    collection_name=new_collection,
+                    alias_name="nextcloud_content"
+                )
+            )
+        ]
+    )
+
+    # 5. Verify new collection works
+    test_results = await run_benchmark_queries()
+    if test_results.recall < baseline_recall:
+        # Rollback
+        logger.error("New embeddings worse than baseline, rolling back")
+        await rollback_migration()
+        return False
+
+    # 6. Delete old collection
+    await qdrant_client.delete_collection(old_collection)
+    logger.info("Migration complete!")
+    return True
+```
+
+**Downtime Mitigation**:
+- Use Qdrant collection aliases for atomic swap
+- Reindex can happen in background
+- Only brief downtime during alias swap (~1s)
+
+**Rollback Plan**:
+- Keep old collection until validation complete
+- If new embeddings worse, swap alias back to old collection
+- No data loss
+
+### 4. Validation & Benchmarking
+
+**Before/After Comparison**:
+
+```python
+# tests/benchmarks/chunking_embedding_comparison.py
+
+async def benchmark_chunking_embeddings():
+    """
+    Compare old vs. new chunking and embeddings on test queries.
+    """
+    test_queries = load_benchmark_queries()  # 100 queries with known relevant docs
+
+    # Baseline (current)
+    baseline_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content",  # Old: nomic-embed-text, word chunks
+    )
+
+    # New implementation
+    new_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content_v2",  # New: mxbai-embed-large-v1, semantic chunks
+    )
+
+    # Compare metrics
+    comparison = {
+        "baseline": {
+            "recall@10": calculate_recall(baseline_results, k=10),
+            "precision@10": calculate_precision(baseline_results, k=10),
+            "mrr": calculate_mrr(baseline_results),
+            "zero_result_rate": calculate_zero_result_rate(baseline_results),
+        },
+        "new": {
+            "recall@10": calculate_recall(new_results, k=10),
+            "precision@10": calculate_precision(new_results, k=10),
+            "mrr": calculate_mrr(new_results),
+            "zero_result_rate": calculate_zero_result_rate(new_results),
+        },
+        "improvement": {
+            "recall_improvement": (new_recall - baseline_recall) / baseline_recall,
+            "precision_improvement": (new_precision - baseline_precision) / baseline_precision,
+        }
+    }
+
+    return comparison
+```
+
+**Success Criteria**:
+- **Recall@10**: Improve from ~52% to ≥75% (+40% improvement)
+- **Precision@10**: Maintain ≥75% (no degradation)
+- **MRR**: Improve from 0.58 to ≥0.70
+- **Zero-result rate**: Reduce from 18% to ≤10%
+- **Indexing time**: Maintain ≤10s per document
+
+**Validation Process**:
+1. Run benchmark on baseline (current implementation)
+2. Implement changes
+3. Run benchmark on new implementation
+4. Compare metrics
+5. If improvement ≥40%, proceed to production
+6. If improvement <40%, investigate and iterate
+
+## Implementation Timeline
+
+### Week 1: Development & Testing
+
+**Day 1-2: Chunking Implementation**
+- [ ] Add langchain-text-splitters dependency
+- [ ] Refactor `document_chunker.py`
+- [ ] Update configuration (character-based chunk sizes)
+- [ ] Write unit tests for semantic boundaries
+- [ ] Validate: Chunks never break mid-sentence
+
+**Day 3-4: Embedding Implementation**
+- [ ] Update `ollama_provider.py` with dynamic dimension detection
+- [ ] Update configuration (new model name)
+- [ ] Deploy `mxbai-embed-large-v1` to Ollama
+- [ ] Test embedding generation with new model
+- [ ] Validate: Embeddings are 1024-dim
+
+**Day 5: Migration Script**
+- [ ] Write migration script (collection creation, reindexing, alias swap)
+- [ ] Test migration on staging environment
+- [ ] Validate: No data loss, atomic swap works
+
+### Week 2: Reindexing & Validation
+
+**Day 1-2: Staging Reindex**
+- [ ] Run full reindex on staging environment
+- [ ] Monitor indexing performance
+- [ ] Validate: All documents indexed correctly
+
+**Day 3: Benchmarking**
+- [ ] Run benchmark queries on old collection (baseline)
+- [ ] Run benchmark queries on new collection
+- [ ] Compare metrics (recall, precision, MRR)
+- [ ] Validate: ≥40% recall improvement
+
+**Day 4: Production Reindex**
+- [ ] Schedule maintenance window (optional, can run in background)
+- [ ] Run migration script on production
+- [ ] Monitor reindexing progress
+- [ ] Atomic swap when complete
+
+**Day 5: Production Validation**
+- [ ] Monitor search quality metrics
+- [ ] Collect user feedback
+- [ ] Compare production metrics to staging
+- [ ] Rollback if issues detected
+
+## Cost Analysis
+
+### Development Cost
+- **Time**: 1-2 weeks (implementation + validation)
+- **Effort**: 40-60 hours @ $100/hour = $4,000 - $6,000
+
+### Infrastructure Cost
+- **Storage**: +30% (1024-dim vs. 768-dim)
+  - Example: 1,000 notes × 3 chunks × 1024 dim × 4 bytes = 12 MB (negligible)
+- **Compute**: +20% embedding time (50ms vs. 30ms per chunk)
+  - Amortized over batch indexing, minimal impact
+- **No new infrastructure**: Uses existing Ollama + Qdrant
+
+### Reindexing Cost (One-Time)
+- **Time**: 2-4 hours for 1,000 documents
+  - 1,000 docs × 3 chunks × 50ms = 150 seconds (~2.5 minutes embedding)
+  - + Ollama processing time + Qdrant insertion
+- **Downtime**: ~1 second (atomic alias swap)
+
+### Total Cost
+- **Initial**: $4,000 - $6,000 (development + testing)
+- **Ongoing**: $0 (no new infrastructure or API costs)
+
+### ROI
+- **Recall improvement**: +40-60% (finding relevant documents)
+- **User satisfaction**: Reduced zero-result queries (18% → 10%)
+- **Foundation**: Enables future enhancements (reranking, hybrid search)
+- **Cost per % improvement**: $100 - $150 (excellent ROI)
+
+## Consequences
+
+### Positive
+
+1. **Addresses Root Causes**: Fixes fundamental issues (chunking, embeddings) not symptoms
+2. **High Impact**: Expected 40-60% recall improvement from foundational changes
+3. **Future-Proof**: Creates solid foundation for future enhancements (reranking, hybrid search, GraphRAG)
+4. **Simple**: No architectural changes, no new infrastructure
+5. **Orthogonal**: Improvements are independent, can be validated separately
+6. **Low Risk**: Proven techniques (RecursiveCharacterTextSplitter, mxbai-embed-large-v1)
+7. **Maintainable**: Standard libraries and models, easy to debug
+
+### Negative
+
+1. **Reindexing Required**: 2-4 hours one-time cost (manageable, can run in background)
+2. **Storage Increase**: +30% for higher-dimensional embeddings (12 MB vs. 9 MB for 1K docs)
+3. **Slower Indexing**: +20% embedding time (50ms vs. 30ms per chunk)
+4. **Dependency**: Adds langchain-text-splitters (minimal, well-maintained library)
+5. **Not a Complete Solution**: May still need reranking/hybrid search for optimal recall (but solid foundation)
+
+### Neutral
+
+1. **Model Lock-In**: Committed to mxbai-embed-large-v1, but can change later (another reindex)
+2. **Chunk Size Trade-offs**: ~512 words is heuristic, may need tuning for specific content types
+
+## Monitoring & Success Metrics
+
+### Real-Time Metrics (Grafana)
+
+**Search Quality**:
+- `semantic_search_recall_at_10` (target: ≥75%)
+- `semantic_search_precision_at_10` (target: ≥75%)
+- `semantic_search_mrr` (target: ≥0.70)
+- `semantic_search_zero_result_rate` (target: ≤10%)
+
+**Performance**:
+- `semantic_search_latency_ms` (p50, p95, p99)
+- `embedding_generation_time_ms`
+- `indexing_throughput_docs_per_sec`
+
+**Indexing**:
+- `documents_indexed_total`
+- `documents_pending`
+- `indexing_errors_total`
+
+### Weekly Validation
+
+**A/B Testing** (if gradual rollout):
+- 50% users: New embeddings
+- 50% users: Old embeddings
+- Compare metrics for 1 week
+- Full rollout if new embeddings superior
+
+**User Feedback**:
+- Survey: "How satisfied are you with search results?" (1-5 scale)
+- Track: Number of "search not working" support tickets
+- Monitor: User-reported false negatives ("I know this doc exists")
+
+### Rollback Criteria
+
+**Automatic Rollback** if:
+- Recall decreases by >10% from baseline
+- Error rate increases by >50%
+- Query latency increases by >100%
+
+**Manual Rollback** if:
+- User complaints increase significantly
+- Zero-result queries increase instead of decrease
+
+## Future Enhancements
+
+These improvements create a solid foundation. Future enhancements (in order of priority):
+
+1. **Cross-Encoder Reranking** (ADR-012)
+   - Two-stage retrieval: broad recall (50 candidates) → precise reranking (top 10)
+   - Expected: +15-20% additional recall improvement
+   - Builds on: Better embeddings retrieve better candidates to rerank
+
+2. **Hybrid Search** (ADR-013)
+   - Combine vector search + BM25 keyword search
+   - Expected: +10-15% additional recall (especially for exact matches)
+   - Builds on: Semantic chunks provide better keyword match context
+
+3. **Multi-App Indexing** (ADR-014)
+   - Index calendar, deck, files (currently notes-only)
+   - Expected: Expands searchable corpus 3-5x
+   - Builds on: Proven chunking and embedding strategy
+
+4. **GraphRAG** (ADR-015, conditional)
+   - Only if: Global thematic queries needed OR corpus >10K documents
+   - Expected: Relationship discovery, multi-hop reasoning
+   - Builds on: High-quality embeddings improve graph construction
+
+## References
+
+### Research Papers
+
+1. **RecursiveCharacterTextSplitter**
+   - LangChain Documentation: https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter
+   - Proven technique used by major RAG systems
+
+2. **MTEB Leaderboard** (Massive Text Embedding Benchmark)
+   - https://huggingface.co/spaces/mteb/leaderboard
+   - Comprehensive embedding model comparison
+
+3. **mxbai-embed-large**
+   - Model: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
+   - Best general-purpose embedding model (MTEB: 64.68)
+
+### Related ADRs
+
+- **ADR-003**: Vector Database and Semantic Search Architecture (original implementation)
+- **ADR-008**: MCP Sampling for Multi-App Semantic Search with RAG (answer generation)
+
+### Tools & Libraries
+
+- **LangChain Text Splitters**: https://python.langchain.com/docs/modules/data_connection/document_transformers/
+- **Ollama Embedding Models**: https://ollama.ai/library
+- **Qdrant Collections**: https://qdrant.tech/documentation/concepts/collections/
+
+## Summary
+
+This ADR addresses the root causes of poor semantic search recall:
+
+1. **Better Chunking**: Semantic sentence-aware splitting (preserves context)
+2. **Better Embeddings**: Upgrade to mxbai-embed-large-v1 (richer semantic space)
+
+**Expected Impact**: 40-60% recall improvement with minimal cost and complexity.
+
+**Why This Approach**:
+- Fixes fundamentals before adding complexity
+- Proven techniques (not experimental)
+- Simple implementation (1-2 weeks)
+- Creates foundation for future enhancements
+- No new infrastructure or ongoing costs
+
+**Next Steps**: Approve ADR → Implement changes → Reindex → Validate → Production rollout
+
+## Implementation Status
+
+### Completed (2025-11-18)
+
+**✅ Semantic Markdown-Aware Chunking (Option C1 + C3 Hybrid)**
+
+Implementation details:
+- Replaced custom word-based chunking with `MarkdownTextSplitter` from LangChain
+- Optimized for Nextcloud Notes markdown content with special handling for:
+  - Headers (`#`, `##`, `###`, etc.)
+  - Code blocks (` ``` `)
+  - Lists (`-`, `*`, `1.`)
+  - Horizontal rules (`---`)
+  - Paragraphs and sentences
+- Maintained `ChunkWithPosition` interface for backward compatibility
+- Updated configuration defaults:
+  - `DOCUMENT_CHUNK_SIZE`: 512 words → 2048 characters
+  - `DOCUMENT_CHUNK_OVERLAP`: 50 words → 200 characters
+- Updated unit tests to verify position tracking and boundary preservation
+- All tests passing with markdown-aware character-based chunking
+
+**Files Modified**:
+- `nextcloud_mcp_server/vector/document_chunker.py` - LangChain integration
+- `nextcloud_mcp_server/config.py` - Character-based defaults
+- `tests/unit/test_document_chunker.py` - Updated test suite
+
+**Dependencies Added**:
+- `langchain-text-splitters>=1.0.0` (already present in `pyproject.toml`)
+
+**Migration Required**:
+- ⚠️ Full reindex required to apply new chunking strategy
+- Existing documents in vector database use old word-based chunks
+- See "Migration Strategy" section above for reindexing process
+
+### Pending
+
+**⏳ Embedding Model Upgrade (Option E1)**
+
+Still to be implemented:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Implement dynamic dimension detection in `ollama_provider.py`
+- Create migration script for collection reindexing
+- Run benchmarking to validate improvement
+- Deploy to production with atomic collection swap
+
+**Estimated Timeline**: 1-2 weeks for implementation and validation
@@ -0,0 +1,619 @@
+# ADR-012: Unified Multi-Algorithm Search with Client-Configurable Weighting
+
+## Status
+Proposed
+
+## Context
+
+### Current State
+
+The Nextcloud MCP server currently provides semantic search via vector similarity (Qdrant), as designed in ADR-003 and implemented through ADR-007. However, users and MCP clients have limited control over search behavior:
+
+1. **Single algorithm only**: Only pure vector similarity search is available
+2. **No algorithm selection**: MCP clients cannot choose between semantic, keyword, or fuzzy approaches
+3. **No weighting control**: Clients cannot adjust the balance between different search methods
+4. **Disconnected implementations**: Viz pane uses different search algorithms than MCP tools
+5. **Limited flexibility**: No way to optimize search for different use cases (exact match vs. conceptual similarity)
+
+### User Needs
+
+Different search scenarios require different algorithms:
+
+- **Exact match queries**: "Find note titled 'Q1 Budget'" → keyword search preferred
+- **Conceptual queries**: "What are my goals for next quarter?" → semantic search preferred
+- **Typo-tolerant queries**: "Find note about kuberntes" → fuzzy search needed
+- **Balanced queries**: "Find documentation about API endpoints" → hybrid search optimal
+
+Additionally, users need a **testing interface** (viz pane) to:
+- Experiment with different search algorithms on their own documents
+- Visualize search results and algorithm behavior
+- Tune weights for optimal results
+- Understand which algorithm works best for their queries
+
+### Technical Requirements
+
+1. **Unified interface**: Single MCP tool supporting multiple algorithms
+2. **Client control**: MCP clients specify algorithm and weights via tool parameters
+3. **Backward compatibility**: Existing `nc_semantic_search()` behavior preserved
+4. **Shared implementation**: Viz pane and MCP tools use identical search algorithms
+5. **User accessibility**: Viz pane available to all logged-in users with vector sync enabled
+6. **Performance**: Minimal overhead for algorithm selection
+
+## Decision
+
+We will implement a **unified multi-algorithm search architecture** with the following components:
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                         MCP Client / User Browser                            │
+│                                                                               │
+│  ┌──────────────────────────┐         ┌──────────────────────────────────┐  │
+│  │   MCP Tool Call          │         │   Viz Pane (Browser UI)          │  │
+│  │                          │         │                                  │  │
+│  │ nc_semantic_search(      │         │ - Algorithm selector dropdown    │  │
+│  │   query="kubernetes",    │         │ - Weight adjustment sliders      │  │
+│  │   algorithm="hybrid",    │         │ - Interactive 2D scatter plot    │  │
+│  │   semantic_weight=0.5,   │         │ - Side-by-side comparison        │  │
+│  │   keyword_weight=0.3,    │         │ - Real-time search testing       │  │
+│  │   fuzzy_weight=0.2       │         │                                  │  │
+│  │ )                        │         │                                  │  │
+│  └───────────┬──────────────┘         └────────────┬─────────────────────┘  │
+└──────────────┼─────────────────────────────────────┼────────────────────────┘
+               │                                      │
+               │ MCP Protocol                         │ HTTPS (htmx)
+               │                                      │
+┌──────────────▼──────────────────────────────────────▼────────────────────────┐
+│                        MCP Server (/app endpoint)                             │
+│                                                                               │
+│  ┌─────────────────────────────────────────────────────────────────────────┐ │
+│  │              Unified Search Interface (server/semantic.py)              │ │
+│  │                                                                         │ │
+│  │  @mcp.tool() nc_semantic_search(algorithm, weights...)                 │ │
+│  │  ├─ Validate parameters (weights sum ≤1.0)                             │ │
+│  │  ├─ Dispatch to algorithm selector                                     │ │
+│  │  └─ Return ranked SearchResponse                                       │ │
+│  └────────────────────────────┬────────────────────────────────────────────┘ │
+│                                │                                              │
+│  ┌────────────────────────────▼────────────────────────────────────────────┐ │
+│  │              Algorithm Dispatcher (search/algorithms.py)                │ │
+│  │                                                                         │ │
+│  │  if algorithm == "semantic":    → semantic.py                          │ │
+│  │  if algorithm == "keyword":     → keyword.py                           │ │
+│  │  if algorithm == "fuzzy":       → fuzzy.py                             │ │
+│  │  if algorithm == "hybrid":      → hybrid.py (RRF fusion)               │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+│                                                                               │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────┐           │
+│  │  semantic.py     │  │  keyword.py      │  │  fuzzy.py        │           │
+│  │                  │  │                  │  │                  │           │
+│  │ • Query Qdrant   │  │ • Token matching │  │ • Char overlap   │           │
+│  │ • Cosine dist    │  │ • Title weight   │  │ • 70% threshold  │           │
+│  │ • Score ≥0.7     │  │ • ADR-001 logic  │  │ • Simple impl    │           │
+│  └────────┬─────────┘  └────────┬─────────┘  └────────┬─────────┘           │
+│           │                     │                      │                     │
+│           └─────────────────────┼──────────────────────┘                     │
+│                                 │                                            │
+│  ┌──────────────────────────────▼──────────────────────────────────────────┐ │
+│  │                    hybrid.py (Reciprocal Rank Fusion)                   │ │
+│  │                                                                         │ │
+│  │  1. Run algorithms in parallel (semantic, keyword, fuzzy)              │ │
+│  │  2. Collect ranked results from each                                   │ │
+│  │  3. Apply RRF formula: score = weight / (k + rank)                     │ │
+│  │  4. Combine scores across algorithms                                   │ │
+│  │  5. Re-rank by combined score                                          │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+└───────────────────────────────────┬───────────────────────────────────────────┘
+                                    │
+                    ┌───────────────┴───────────────┐
+                    │                               │
+         ┌──────────▼──────────┐         ┌─────────▼────────────┐
+         │ Qdrant Vector DB    │         │ Nextcloud APIs       │
+         │                     │         │                      │
+         │ • Vector search     │         │ • Access verification│
+         │ • user_id filter    │         │ • Full metadata fetch│
+         │ • Score threshold   │         │ • Permission checks  │
+         │ • 768-dim embeddings│         │                      │
+         └─────────────────────┘         └──────────────────────┘
+```
+
+### Data Flow
+
+#### MCP Tool Request
+```
+1. Client calls nc_semantic_search(query, algorithm="hybrid", weights...)
+2. Server validates parameters (weights sum ≤1.0)
+3. Dispatcher routes to hybrid.py
+4. Hybrid search runs semantic, keyword, fuzzy in parallel
+5. RRF combines results with weighted scores
+6. Access verification via Nextcloud API
+7. Return ranked SearchResponse to client
+```
+
+#### Viz Pane Request (Server-Side Processing)
+```
+1. User navigates to /app (Vector Visualization tab)
+2. Browser loads vector-viz fragment via htmx
+3. User enters query and adjusts algorithm/weights
+4. htmx sends request to /app/vector-viz endpoint
+5. Server executes search via search/algorithms.py:
+   - Filters by user_id (multi-tenant security)
+   - Applies selected algorithm (semantic/keyword/fuzzy/hybrid)
+   - Filters by document type (notes/files/calendar/contacts)
+   - Retrieves matching results + metadata
+6. Server performs PCA reduction (768-dim → 2D):
+   - Converts matching results to 2D coordinates
+   - Only sends coordinates + metadata (not full vectors)
+   - Dramatically reduces bandwidth (e.g., 768 floats → 2 floats per doc)
+7. Server returns JSON: {results: [...], coordinates_2d: [...], stats: {...}}
+8. Browser receives lightweight response
+9. Plotly.js renders interactive scatter plot
+10. Matching results highlighted (blue), non-matches grayed (40% opacity)
+```
+
+**Performance Benefits of Server-Side Processing**:
+- **Bandwidth reduction**: ~384x less data (2 floats vs 768 floats per document)
+- **Client efficiency**: Browser only handles visualization, not computation
+- **Scalability**: Can visualize 10,000+ documents without client-side lag
+- **Security**: Raw vectors never leave server
+- **Consistency**: Same search logic as MCP tool (no drift)
+
+### 1. Core Search Algorithms
+
+Four search algorithms will be available:
+
+#### a) Semantic Search (Vector Similarity)
+- **Method**: Cosine distance in 768-dimensional embedding space
+- **Implementation**: Qdrant `query_points` with user_id filtering
+- **Use case**: Conceptual queries, finding related content
+- **Current status**: Implemented in `nextcloud_mcp_server/server/semantic.py`
+
+#### b) Keyword Search (Token-Based)
+- **Method**: Token matching with weighted scoring (from ADR-001)
+- **Implementation**: Title matches weighted 3x higher than content
+- **Use case**: Exact phrase matching, known titles
+- **Current status**: Designed in ADR-001, not implemented
+
+#### c) Fuzzy Search (Character Overlap)
+- **Method**: Simple character-based similarity (70% threshold)
+- **Implementation**: Character set comparison (current viz pane approach)
+- **Use case**: Typo tolerance, approximate matching
+- **Current status**: Implemented in viz pane only
+
+#### d) Hybrid Search (Multi-Algorithm Fusion)
+- **Method**: Reciprocal Rank Fusion (RRF) from ADR-003
+- **Implementation**: Parallel execution + score combination
+- **Use case**: Balanced queries, general-purpose search
+- **Current status**: Designed in ADR-003, not implemented
+
+### 2. Unified MCP Tool Interface
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(
+    query: str,
+    ctx: Context,
+    limit: int = 10,
+    score_threshold: float = 0.7,
+    algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+    semantic_weight: float = 0.5,
+    keyword_weight: float = 0.3,
+    fuzzy_weight: float = 0.2,
+) -> SearchResponse:
+    """
+    Search Nextcloud content using configurable algorithms.
+
+    Args:
+        query: Natural language search query
+        ctx: MCP context for authentication
+        limit: Maximum results to return
+        score_threshold: Minimum similarity score (semantic/hybrid only)
+        algorithm: Search algorithm to use
+        semantic_weight: Weight for semantic results (hybrid only, default: 0.5)
+        keyword_weight: Weight for keyword results (hybrid only, default: 0.3)
+        fuzzy_weight: Weight for fuzzy results (hybrid only, default: 0.2)
+
+    Returns:
+        Ranked search results with scores and excerpts
+    """
+```
+
+**Key decisions**:
+- **Single tool name**: Keep `nc_semantic_search` for backward compatibility
+- **Algorithm parameter**: Explicit selection via enum
+- **Weight parameters**: Client-configurable, only apply to hybrid mode
+- **Validation**: Weights must sum to ≤1.0, enforced server-side
+- **Defaults**: Hybrid mode with balanced weights (semantic 50%, keyword 30%, fuzzy 20%)
+
+### 3. Shared Algorithm Implementation
+
+Extract search algorithms into reusable module:
+
+```
+nextcloud_mcp_server/
+├── search/
+│   ├── __init__.py
+│   ├── algorithms.py          # Core search implementations
+│   ├── semantic.py             # Vector similarity search
+│   ├── keyword.py              # Token-based search (ADR-001)
+│   ├── fuzzy.py                # Character overlap search
+│   └── hybrid.py               # RRF fusion (ADR-003)
+└── server/
+    └── semantic.py             # MCP tool wrapper
+```
+
+**Benefits**:
+- Viz pane and MCP tools share identical implementations
+- Testable in isolation
+- Easy to add new algorithms (e.g., BM25, neural reranking)
+- Clear separation of concerns
+
+### 4. Viz Pane Integration
+
+Update viz pane (`nextcloud_mcp_server/auth/userinfo_routes.py`) to:
+
+1. **Use shared algorithms**: Import from `search/algorithms.py`
+2. **Server-side filtering**: All search and filtering operations happen server-side
+   - Query execution via shared search backend
+   - Document type filtering (notes, files, calendar, contacts)
+   - User ID filtering for multi-tenant security
+   - Only matching results + metadata sent to client
+   - Reduces bandwidth and improves performance
+3. **PCA reduction**: Server performs dimensionality reduction (768-dim → 2D)
+   - Only 2D coordinates sent to browser for visualization
+   - Dramatically reduces data transfer vs sending full vectors
+   - Enables visualization of large document collections
+4. **User accessibility**: Available to all users with vector sync enabled
+5. **Security**: Filter results by `user_id` (only show user's own documents)
+6. **Interactive testing**: Allow users to:
+   - Select algorithm type
+   - Adjust weights (hybrid mode)
+   - Compare results across algorithms
+   - Visualize result distribution in 2D space
+
+#### Viz Pane UI Components
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│ Vector Visualization                                          [Status] │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Configuration                                             │  │
+│ │                                                                  │  │
+│ │ Query: [_______________________________________________] [Search]│  │
+│ │                                                                  │  │
+│ │ Algorithm: [Hybrid ▼]  [Semantic] [Keyword] [Fuzzy]             │  │
+│ │                                                                  │  │
+│ │ Weights (Hybrid Mode):                                           │  │
+│ │   Semantic: [========50========] 0.5                             │  │
+│ │   Keyword:  [======30======    ] 0.3                             │  │
+│ │   Fuzzy:    [====20====        ] 0.2                             │  │
+│ │                                                                  │  │
+│ │ Document Types: ☑ Notes  ☑ Files  ☑ Calendar  ☑ Contacts        │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Vector Space Visualization (PCA 2D Projection)                   │  │
+│ │                                                                  │  │
+│ │        ▲                                                         │  │
+│ │    PC2 │     ●  ● ●      🔵 Matching results (full opacity)     │  │
+│ │        │  ●     ●  ●     ⚪ Non-matching results (40% opacity)   │  │
+│ │        │    🔵  ● ●                                              │  │
+│ │        │  ●  🔵  ●       Hover: Show document title + excerpt    │  │
+│ │        │  ● ●  🔵 ●      Click: Open document in Nextcloud       │  │
+│ │    ────┼──●─🔵──●─●────► PC1                                     │  │
+│ │        │   ● ●  ●                                                │  │
+│ │        │    🔵 ●   ●     Explained Variance:                     │  │
+│ │        │  ●    ●  ●      PC1: 23.4% | PC2: 18.7%                 │  │
+│ │        │     ● ●                                                 │  │
+│ │                                                                  │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Results (12 matching documents)                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Kubernetes Setup Guide                        Score: 0.87     │  │
+│ │    "...configure kubectl to connect to cluster..."              │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Container Orchestration Notes                 Score: 0.82     │  │
+│ │    "...deployment strategies for kubernetes..."                 │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 K8s Troubleshooting                           Score: 0.79     │  │
+│ │    "...common kuberntes errors and solutions..."                │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ [Show More Results...]                                           │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Algorithm Performance Comparison                                 │  │
+│ │                                                                  │  │
+│ │ Algorithm    │ Results │ Avg Score │ Time (ms) │ Precision     │  │
+│ │ ─────────────┼─────────┼───────────┼───────────┼───────────     │  │
+│ │ Semantic     │   45    │   0.78    │   145ms   │  ████░ 0.82   │  │
+│ │ Keyword      │   23    │   0.91    │    42ms   │  ███░░ 0.67   │  │
+│ │ Fuzzy        │   67    │   0.72    │    89ms   │  ██░░░ 0.45   │  │
+│ │ Hybrid (RRF) │   52    │   0.84    │   198ms   │  █████ 0.89   │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+**Key UI Features**:
+
+1. **Search Input**: Real-time query testing with instant visualization
+2. **Algorithm Selector**: Dropdown + quick-select buttons
+3. **Weight Sliders**: Visual adjustment with live preview (hybrid mode only)
+4. **Document Type Filters**: Checkboxes for notes, files, calendar, contacts
+5. **2D Scatter Plot**: Interactive Plotly.js visualization
+   - Blue dots = matching documents (full opacity)
+   - Gray dots = non-matching documents (40% opacity)
+   - Hover = show title + excerpt tooltip
+   - Click = open document in Nextcloud
+   - Zoom/pan controls for exploration
+6. **Results Panel**: Ranked list with scores and excerpts
+7. **Performance Table**: Compare algorithm speed and accuracy
+8. **Explained Variance**: Show how much information PCA preserves
+
+**Technology Stack**:
+- **Frontend**: htmx for dynamic loading, Alpine.js for reactivity
+- **Visualization**: Plotly.js for interactive scatter plots
+- **Styling**: Tailwind CSS (consistent with existing /app UI)
+- **Backend**: Shared `search/algorithms.py` implementation
+
+### 5. Reciprocal Rank Fusion (RRF) for Hybrid Search
+
+Following ADR-003's design:
+
+```python
+def reciprocal_rank_fusion(
+    results: dict[str, list[SearchResult]],
+    weights: dict[str, float],
+    k: int = 60
+) -> list[SearchResult]:
+    """
+    Combine multiple ranked result lists using RRF.
+
+    Args:
+        results: Dict of algorithm_name -> ranked results
+        weights: Dict of algorithm_name -> weight (0-1)
+        k: RRF constant (default: 60, standard value)
+
+    Returns:
+        Combined and re-ranked results
+    """
+    scores = defaultdict(float)
+
+    for algo_name, algo_results in results.items():
+        weight = weights.get(algo_name, 0.0)
+        for rank, result in enumerate(algo_results, start=1):
+            # RRF formula: 1 / (k + rank)
+            rrf_score = weight / (k + rank)
+            scores[result.doc_id] += rrf_score
+
+    # Sort by combined score, return top results
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
+```
+
+**RRF properties**:
+- **Rank-based**: Uses position, not raw scores (handles score scale differences)
+- **Proven effective**: Standard approach in information retrieval
+- **Configurable**: `k` parameter controls rank decay (default: 60)
+- **Weight support**: Allows algorithm-specific importance
+
+## Implementation Plan
+
+### Phase 1: Extract and Unify Algorithms (Week 1)
+
+1. Create `nextcloud_mcp_server/search/` module
+2. Implement `algorithms.py` with base interface
+3. Extract semantic search logic from `server/semantic.py`
+4. Implement keyword search from ADR-001 design
+5. Extract fuzzy search from viz pane
+6. Implement RRF hybrid search from ADR-003
+7. Add comprehensive unit tests for each algorithm
+
+### Phase 2: Update MCP Tool (Week 1-2)
+
+1. Add `algorithm` parameter to `nc_semantic_search()`
+2. Add weight parameters (`semantic_weight`, etc.)
+3. Implement algorithm dispatcher
+4. Add parameter validation (weights sum ≤1.0)
+5. Update response model to include algorithm metadata
+6. Maintain backward compatibility (default: hybrid)
+7. Add integration tests for all algorithm modes
+
+### Phase 3: Update Viz Pane (Week 2)
+
+**Critical: All processing must happen server-side**
+
+1. **Remove client-side search filtering**
+   - Delete JavaScript-based keyword/fuzzy matching
+   - Remove client-side document type filtering
+   - No search logic in browser
+2. **Implement server-side endpoint** (`/app/vector-viz`)
+   - Accept query, algorithm, weights, doc_type filters
+   - Execute search via `search/algorithms.py`
+   - Filter results by user_id (security)
+   - Perform PCA reduction (768-dim → 2D)
+   - Return JSON with 2D coordinates + metadata only
+3. **Update frontend**
+   - htmx form submission to `/app/vector-viz`
+   - Algorithm selector dropdown
+   - Weight adjustment sliders (htmx updates on change)
+   - Document type checkboxes
+   - Plotly.js visualization of server response
+4. **Performance optimization**
+   - Limit results to user's documents only
+   - Cache PCA transformation (invalidate on new vectors)
+   - Stream large result sets if needed
+   - Add loading indicators for server processing
+
+### Phase 4: Documentation and Testing (Week 2-3)
+
+1. Update MCP tool documentation
+2. Add algorithm selection guide
+3. Document weight tuning recommendations
+4. Add end-to-end tests (MCP + viz pane)
+5. Performance benchmarks for each algorithm
+6. Update CLAUDE.md with search patterns
+
+## Consequences
+
+### Positive
+
+1. **Flexibility**: MCP clients can optimize search for their use case
+2. **Unified implementation**: Single source of truth for search algorithms
+3. **User empowerment**: Viz pane enables query testing and tuning
+4. **Backward compatible**: Existing semantic search behavior preserved
+5. **Extensible**: Easy to add new algorithms (BM25, neural reranking)
+6. **Testable**: Each algorithm can be unit tested independently
+7. **Standards-based**: RRF is proven in production systems
+
+### Negative
+
+1. **Complexity**: More parameters for clients to understand
+2. **API surface**: Larger tool signature (8 parameters)
+3. **Performance**: Hybrid search requires multiple queries
+4. **Validation overhead**: Weight validation adds processing
+5. **Documentation burden**: Need to explain when to use each algorithm
+
+### Neutral
+
+1. **Weight defaults**: May need tuning based on user feedback
+2. **Algorithm performance**: Will vary by content type and query
+3. **Viz pane adoption**: Unknown if users will utilize testing interface
+
+## Alternatives Considered
+
+### Alternative 1: Separate Tools Per Algorithm
+
+```python
+@mcp.tool()
+async def nc_semantic_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure vector similarity search."""
+
+@mcp.tool()
+async def nc_keyword_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure keyword matching."""
+
+@mcp.tool()
+async def nc_hybrid_search(query: str, ctx: Context, weights: dict, ...) -> SearchResponse:
+    """Hybrid search with weights."""
+```
+
+**Rejected because**:
+- API proliferation (3+ tools instead of 1)
+- Harder to discover capabilities
+- Backward compatibility issues
+- DRY violation (repeated parameters)
+
+### Alternative 2: Server-Wide Configuration Only
+
+```python
+# .env configuration
+SEARCH_ALGORITHM=hybrid
+SEMANTIC_WEIGHT=0.5
+KEYWORD_WEIGHT=0.3
+FUZZY_WEIGHT=0.2
+```
+
+**Rejected because**:
+- No per-query flexibility
+- MCP clients cannot optimize for different tasks
+- Requires server restart for changes
+- User's requirement: "expose a way for users to override the default weights"
+
+### Alternative 3: Production-Grade Fuzzy (Levenshtein/RapidFuzz)
+
+**Rejected because**:
+- Adds external dependency
+- Simple character overlap performs adequately
+- Can always upgrade later if needed
+- User's preference: "Keep simple character overlap"
+
+## Related ADRs
+
+- **ADR-001**: Enhanced Note Search (keyword algorithm design)
+- **ADR-003**: Vector Database and Semantic Search (hybrid search + RRF design)
+- **ADR-007**: Background Vector Sync (semantic search implementation)
+- **ADR-008**: MCP Sampling for RAG (uses semantic search results)
+- **ADR-009**: Semantic Search OAuth Scope (security model)
+- **ADR-011**: Improving Semantic Search Quality (mentions future "ADR-013" for hybrid search)
+
+**This ADR supersedes**:
+- ADR-011's placeholder for "ADR-013: Hybrid Search"
+
+**This ADR implements**:
+- ADR-003's hybrid search design (previously unimplemented)
+- ADR-001's keyword search design (previously unimplemented)
+
+## References
+
+- **Reciprocal Rank Fusion**: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). "Reciprocal rank fusion outperforms condorcet and individual rank learning methods." SIGIR '09.
+- **Vector Search**: Malkov, Y. A., & Yashunin, D. A. (2018). "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs." TPAMI.
+- **Hybrid Search Best Practices**: Qdrant documentation on hybrid search patterns
+- **MCP Protocol**: Model Context Protocol specification for tool design
+
+## Implementation Notes
+
+### Weight Validation
+
+```python
+def validate_weights(
+    semantic_weight: float,
+    keyword_weight: float,
+    fuzzy_weight: float
+) -> None:
+    """Validate hybrid search weights."""
+    if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+        raise ValueError("Weights must be non-negative")
+
+    total = semantic_weight + keyword_weight + fuzzy_weight
+    if total > 1.0:
+        raise ValueError(f"Weights sum to {total:.2f}, must be ≤1.0")
+
+    if total == 0.0:
+        raise ValueError("At least one weight must be > 0")
+```
+
+### Backward Compatibility
+
+The default behavior (`algorithm="hybrid"` with balanced weights) provides better results than current pure semantic search, while maintaining the same tool name and signature structure. Existing clients will automatically benefit from hybrid search without code changes.
+
+### Performance Considerations
+
+- **Semantic search**: ~50-200ms (vector DB query)
+- **Keyword search**: ~10-50ms (in-memory token matching)
+- **Fuzzy search**: ~20-100ms (character comparison)
+- **Hybrid search**: ~100-300ms (parallel execution + fusion)
+
+Parallel execution of algorithms minimizes hybrid search latency.
+
+### Security Model
+
+All algorithms respect the same security boundaries:
+1. **User filtering**: Qdrant queries filter by `user_id`
+2. **Access verification**: Results verified via Nextcloud API
+3. **OAuth scope**: `semantic:read` required for all algorithms
+4. **Viz pane**: Shows only current user's documents
+
+## Success Metrics
+
+1. **Adoption**: % of MCP clients using algorithm parameter
+2. **Performance**: Search latency percentiles (p50, p95, p99)
+3. **Quality**: User satisfaction with result relevance
+4. **Viz pane usage**: % of users accessing testing interface
+5. **Weight distribution**: Most common weight configurations
+
+## Future Enhancements
+
+1. **Additional algorithms**: BM25, TF-IDF, neural reranking
+2. **Auto-tuning**: Learn optimal weights per user
+3. **Query analysis**: Automatic algorithm selection based on query
+4. **Cross-app search**: Extend beyond notes to calendar, files, etc.
+5. **Feedback loop**: Use click-through rate to improve weights
@@ -0,0 +1,254 @@
+## ADR-013: RAG Evaluation Testing Framework
+
+**Status:** Proposed
+
+**Date:** 2025-11-15
+
+### Context
+
+The `nc_semantic_search_answer` tool implements a Retrieval-Augmented Generation (RAG) system where:
+1. **Retrieval**: Vector sync pipeline indexes Nextcloud documents (notes, calendar, contacts, etc.) into a vector database
+2. **Generation**: MCP client's LLM synthesizes answers from retrieved documents via MCP sampling (ADR-008)
+
+We need a testing framework to evaluate RAG system performance and identify whether failures occur in retrieval (wrong documents found) or generation (poor answer quality). This framework must use industry-standard evaluation methodologies while remaining practical to implement and maintain.
+
+To establish a baseline, we will use the **BeIR/nfcorpus** dataset (medical/biomedical corpus) with ~5,000 documents and established query/answer pairs.
+
+Homepage: https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/
+Download: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip
+
+### Decision
+
+We will implement a **two-part evaluation framework** that independently tests retrieval and generation quality using pytest fixtures.
+
+#### In Scope
+
+**1. Retrieval Evaluation**
+Tests the vector sync/embedding pipeline's ability to find relevant documents.
+
+- **Metric: Context Recall** (Did we retrieve documents containing the answer?)
+  - **Evaluation method**: Heuristic - Check if ground-truth document IDs appear in top-k retrieval results
+  - **Test**: Query → Semantic search → Assert expected doc IDs present
+
+**2. Generation Evaluation**
+Tests the MCP client LLM's ability to synthesize correct answers from retrieved context.
+
+- **Metric: Answer Correctness** (Is the generated answer factually correct?)
+  - **Evaluation method**: LLM-as-judge - Compare RAG answer against ground-truth answer
+  - **Test**: Query → `nc_semantic_search_answer` → LLM evaluates answer vs. ground truth (binary true/false)
+
+#### Out of Scope (Initial Implementation)
+
+- **Context Relevance/Precision**: Measuring irrelevant documents in retrieval results
+- **Faithfulness/Groundedness**: Detecting hallucinations not supported by retrieved context
+- **Answer Relevance**: Whether answer addresses the specific question asked
+- **Out-of-Scope Handling**: Testing "I don't know" responses when answer isn't in context
+- **Continuous benchmarking**: Automated tracking of metric trends over time
+- **Custom domain datasets**: Production-specific test data (medical corpus used initially)
+
+These remain valuable for future iterations but add complexity beyond our initial goals.
+
+#### Implementation
+
+**Test Structure**
+
+Location: `tests/rag_evaluation/`
+- `test_retrieval_quality.py` - Retrieval evaluation tests
+- `test_generation_quality.py` - Generation evaluation tests
+- `conftest.py` - Fixtures for test data, MCP clients, and evaluation LLMs
+
+**Required Pytest Fixtures**
+
+1. **`nfcorpus_test_data`** (session-scoped)
+   - Downloads/caches BeIR nfcorpus dataset at runtime
+   - Loads 5 pre-selected test queries with:
+     - Query text
+     - Pre-generated ground-truth answer (from `tests/rag_evaluation/fixtures/ground_truth.json`)
+     - Expected document IDs (from qrels with score=2)
+   - Uploads all corpus documents as notes in test Nextcloud instance
+   - Triggers vector sync to index documents
+   - Waits for indexing completion
+   - Returns test case data structure
+
+2. **`mcp_sampling_client`** (session-scoped)
+   - Creates MCP client that supports sampling
+   - Configurable LLM provider (ollama or anthropic) via environment:
+     - `RAG_EVAL_PROVIDER=ollama` (default) or `anthropic`
+     - `RAG_EVAL_OLLAMA_BASE_URL=http://localhost:11434`
+     - `RAG_EVAL_OLLAMA_MODEL=llama3.1:8b`
+     - `RAG_EVAL_ANTHROPIC_API_KEY=sk-...`
+     - `RAG_EVAL_ANTHROPIC_MODEL=claude-3-5-sonnet-20241022`
+   - Returns configured MCP client fixture
+
+3. **`evaluation_llm`** (session-scoped)
+   - Separate LLM instance for evaluation (independent from MCP client)
+   - Same provider configuration as `mcp_sampling_client`
+   - Returns callable: `async def evaluate(prompt: str) -> str`
+
+**Test Implementation Examples**
+
+```python
+# tests/rag_evaluation/test_retrieval_quality.py
+async def test_retrieval_recall(nc_client, nfcorpus_test_data):
+    """Test that semantic search retrieves documents containing the answer."""
+    for test_case in nfcorpus_test_data:
+        # Perform semantic search (retrieval only, no generation)
+        results = await nc_client.notes.semantic_search(
+            query=test_case.query,
+            limit=10
+        )
+
+        retrieved_doc_ids = {r.document_id for r in results}
+        expected_doc_ids = set(test_case.expected_document_ids)
+
+        # Context Recall: Are expected documents in top-k results?
+        recall = len(expected_doc_ids & retrieved_doc_ids) / len(expected_doc_ids)
+        assert recall >= 0.8, f"Recall {recall} below threshold for query: {test_case.query}"
+
+
+# tests/rag_evaluation/test_generation_quality.py
+async def test_answer_correctness(mcp_sampling_client, evaluation_llm, nfcorpus_test_data):
+    """Test that RAG system generates factually correct answers."""
+    for test_case in nfcorpus_test_data:
+        # Execute full RAG pipeline (retrieval + generation)
+        result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": test_case.query, "limit": 5}
+        )
+
+        rag_answer = result["generated_answer"]
+
+        # LLM-as-judge evaluation
+        evaluation_prompt = f"""Compare these two answers and respond with only TRUE or FALSE.
+
+Question: {test_case.query}
+
+Generated Answer: {rag_answer}
+
+Ground Truth Answer: {test_case.ground_truth}
+
+Are these answers semantically equivalent (do they convey the same factual information)?
+Respond with only: TRUE or FALSE"""
+
+        evaluation_result = await evaluation_llm(evaluation_prompt)
+
+        assert evaluation_result.strip().upper() == "TRUE", \
+            f"Answer mismatch for query: {test_case.query}\nGot: {rag_answer}\nExpected: {test_case.ground_truth}"
+```
+
+**Dataset Integration**
+
+The BeIR nfcorpus dataset structure:
+- **corpus.jsonl**: 3,633 medical/biomedical documents (articles from PubMed)
+- **queries.jsonl**: 3,237 queries (questions)
+- **qrels/*.tsv**: Relevance judgments mapping query IDs to document IDs with scores (2=highly relevant, 1=somewhat relevant)
+
+**Important**: The dataset provides relevance judgments (which documents answer which queries) but does NOT include ground truth answers. We must generate synthetic ground truth offline.
+
+**Selected Test Queries** (5 diverse candidates):
+
+1. **PLAIN-2630**: "Alkylphenol Endocrine Disruptors and Allergies" (5 words, 21 highly relevant docs)
+2. **PLAIN-2660**: "How Long to Detox From Fish Before Pregnancy?" (8 words, 20 highly relevant docs)
+3. **PLAIN-2510**: "Coffee and Artery Function" (4 words, 16 highly relevant docs)
+4. **PLAIN-2430**: "Preventing Brain Loss with B Vitamins?" (6 words, 15 highly relevant docs)
+5. **PLAIN-2690**: "Chronic Headaches and Pork Tapeworms" (5 words, 14 highly relevant docs)
+
+**Ground Truth Generation** (offline, pre-test):
+
+Ground truth answers will be generated offline using a script that:
+1. Loads nfcorpus dataset
+2. For each selected query, extracts top 3-5 highly relevant documents
+3. Uses an LLM (ollama/anthropic) to synthesize a reference answer
+4. Stores ground truth in `tests/rag_evaluation/fixtures/ground_truth.json`
+
+```python
+# tools/generate_rag_ground_truth.py
+async def generate_ground_truth(query: str, relevant_docs: List[dict], llm: LLMProvider) -> str:
+    """Generate synthetic ground truth answer from highly relevant documents."""
+    context = "\n\n".join([
+        f"Document {i+1}:\nTitle: {doc['title']}\n{doc['text']}"
+        for i, doc in enumerate(relevant_docs[:5])
+    ])
+
+    prompt = f"""Based on the following documents, provide a comprehensive answer to this question:
+
+Question: {query}
+
+{context}
+
+Provide a factual, well-structured answer that synthesizes information from the documents.
+Focus on accuracy and completeness."""
+
+    return await llm.generate(prompt, max_tokens=500)
+```
+
+**Dataset Loading at Test Runtime** (in `nfcorpus_test_data` fixture):
+
+1. Download nfcorpus dataset (cached in pytest temp directory)
+2. Load corpus, queries, and qrels (relevance judgments)
+3. Load pre-generated ground truth from `tests/rag_evaluation/fixtures/ground_truth.json`
+4. Upload all corpus documents as Nextcloud notes
+5. Trigger vector sync to index documents
+6. Wait for indexing completion
+7. Return test cases with query, ground truth, and expected doc IDs
+
+**LLM Provider Abstraction**
+
+```python
+# tests/rag_evaluation/llm_providers.py
+class LLMProvider(Protocol):
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str: ...
+
+class OllamaProvider:
+    def __init__(self, base_url: str, model: str):
+        self.base_url = base_url
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        # Use httpx to call Ollama API
+        ...
+
+class AnthropicProvider:
+    def __init__(self, api_key: str, model: str):
+        self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return message.content[0].text
+```
+
+### Consequences
+
+**Positive:**
+
+* **Actionable debugging**: Separate retrieval/generation tests pinpoint failure location
+* **Industry-standard metrics**: Context Recall and Answer Correctness are recognized RAG evaluation metrics
+* **Simple initial implementation**: Binary LLM evaluation (true/false) is straightforward to implement and interpret
+* **Extensible framework**: Easy to add more metrics (faithfulness, relevance) later
+* **Standardized benchmark**: nfcorpus provides objective comparison against published RAG systems
+* **Hybrid evaluation**: Combines efficiency (heuristics for retrieval) with quality (LLM-as-judge for generation)
+* **Provider flexibility**: Supports both local (Ollama) and cloud (Anthropic) LLM evaluation
+
+**Negative:**
+
+* **Medical domain bias**: nfcorpus is medical/biomedical content, may not represent production use cases (personal notes, calendar events, etc.)
+* **Manual test execution**: Tests require external LLM access and are not integrated into CI pipeline
+* **Limited initial coverage**: Starting with only 5 queries provides limited statistical confidence
+* **Evaluation cost**: LLM-as-judge for generation evaluation incurs API costs (Anthropic) or requires local inference (Ollama)
+* **Single metric per component**: Initial scope tests only one metric per component, missing other important quality dimensions
+* **Synthetic ground truth**: Ground truth answers are LLM-generated, not human-validated, which may introduce evaluation bias
+* **Large corpus upload**: Uploading 3,633 documents at test runtime may be slow; caching strategy needed
+
+**Future Work:**
+
+* Expand to 50-100 queries for statistical significance
+* Add custom test dataset with production-representative documents (meeting notes, task lists, etc.)
+* Implement additional metrics (faithfulness, context relevance, answer relevance)
+* Create automated benchmarking dashboard to track metric trends
+* Test multi-hop reasoning (synthesis questions requiring multiple documents)
+* Evaluate out-of-scope handling ("I don't know" responses)
@@ -0,0 +1,241 @@
+# ADR-014: Replace Custom Keyword Search with BM25 Hybrid Search via Qdrant
+
+**Date:** 2025-11-16
+
+**Status:** Implemented
+
+---
+
+### 1. Context
+
+Our RAG application currently employs two separate retrieval mechanisms:
+1.  **Dense (Semantic) Search:** Using vector embeddings stored in our Qdrant database to find semantically similar context.
+2.  **Keyword Search:** A custom-built fuzzy/character-based search to match-specific keywords, acronyms, and product codes that semantic search often misses.
+
+This dual-system approach has several drawbacks:
+* **Poor Relevance:** Our current keyword search is basic (e.g., `LIKE` queries or simple fuzzy matching). It is not as effective as modern full-text search algorithms like BM25.
+* **Clunky Fusion:** We lack a robust, principled method to combine the results from the two systems. This leads to disjointed logic in the application layer and suboptimal context being passed to the LLM.
+* **Architectural Complexity:** We must maintain two separate search pathways (one to Qdrant, one to the keyword search mechanism), increasing code complexity and maintenance overhead.
+
+Our vector database, **Qdrant**, natively supports **hybrid search** by combining dense vectors with BM25-based **sparse vectors** in a single collection.
+
+### 2. Decision
+
+We will **deprecate and remove** the existing custom keyword/fuzzy search functionality.
+
+We will **replace it by implementing native hybrid search within Qdrant**. This involves:
+1.  **Modifying the Qdrant Collection:** Updating our collection to support a named sparse vector index configured for BM25.
+2.  **Updating the Ingestion Pipeline:** For every document chunk, we will generate and upsert *both*:
+    * Its **dense vector** (from our existing embedding model).
+    * Its **sparse vector** (generated using a BM25-compatible model, e.g., `Qdrant/bm25` from `fastembed`).
+3.  **Refactoring Retrieval Logic:** All retrieval calls will be consolidated into a single Qdrant query using the `query_points` endpoint. This query will use the `prefetch` parameter to execute both dense and sparse searches, and Qdrant's built-in **Reciprocal Rank Fusion (RRF)** to automatically merge the results into a single, relevance-ranked list.
+4.  **Backfilling:** A one-time migration script will be created to generate and add sparse vectors for all existing documents in the Qdrant collection.
+
+---
+
+### 3. Considered Options
+
+#### Option 1: Native Qdrant Hybrid Search (Chosen)
+* Use Qdrant's built-in sparse vector and RRF capabilities.
+* **Pros:**
+    * **Consolidated Architecture:** Manages both dense and sparse indexes in one database.
+    * **No Data Sync Issues:** Updates are atomic. A single `upsert` updates both representations.
+    * **Built-in Fusion:** RRF is handled natively and efficiently by the database.
+    * **Superior Relevance:** Replaces our brittle custom search with the industry-standard BM25.
+* **Cons:**
+    * Requires a one-time data backfill which may be time-consuming.
+    * Adds a new step (sparse vector generation) to the ingestion pipeline.
+
+#### Option 2: External Full-Text Search (e.g., Elasticsearch)
+* Keep Qdrant for dense search and add a separate Elasticsearch/OpenSearch cluster for BM25.
+* **Pros:**
+    * Provides a very powerful, dedicated full-text search engine.
+* **Cons:**
+    * **High Complexity:** Introduces a new, stateful service to deploy, manage, and scale.
+    * **Data Sync Nightmare:** We would be responsible for ensuring that the document IDs and content in Qdrant and Elasticsearch are always perfectly synchronized. This is a major source of bugs.
+    * **Manual Fusion:** The application would have to query both systems and perform RRF manually.
+
+#### Option 3: Keep Current System
+* Make no changes.
+* **Pros:**
+    * No engineering effort required.
+* **Cons:**
+    * Fails to address the known relevance and architectural problems.
+    * Our RAG application's performance will remain suboptimal, especially for keyword-sensitive queries.
+
+---
+
+### 4. Rationale
+
+**Option 1 is the clear winner.** It directly solves our primary problem (poor keyword matching) by adopting the industry-standard BM25.
+
+Critically, it achieves this while **simplifying** our overall architecture, not complicating it. By leveraging features already present in our existing database (Qdrant), we avoid the massive operational and synchronization overhead of adding a second search system (Option 2).
+
+This decision consolidates our retrieval logic, eliminates the data consistency problem, and moves the complex fusion logic (RRF) from the application layer into the database, where it can be performed more efficiently.
+
+### 5. Consequences
+
+**New Work:**
+* **Ingestion:** The data ingestion pipeline must be updated to add the `fastembed` library (or similar), generate sparse vectors, and upsert them to the new named vector field in Qdrant.
+* **Retrieval:** The application's retrieval service must be refactored to use the `query_points` endpoint with `prefetch` and `fusion=models.Fusion.RRF`.
+* **Migration:** A one-time backfill script must be written and executed to add sparse vectors for all existing documents.
+* **Infrastructure:** The Qdrant collection schema must be updated (or re-created) to add the `sparse_vectors_config`.
+
+**Positive:**
+* **Improved Accuracy:** Retrieval will be significantly more accurate, handling both semantic and keyword queries robustly.
+* **Simplified Code:** The application's retrieval logic will be cleaner and simpler, with one endpoint instead of two.
+* **Reduced Maintenance:** We will remove the custom fuzzy-search code, which is brittle and difficult to maintain.
+
+**Negative:**
+* The data backfill process will require careful management to avoid downtime.
+* Ingestion time will slightly increase due to the extra step of sparse vector generation. This is considered a negligible trade-off for the gains in relevance.
+
+---
+
+### 6. Implementation Notes
+
+**Implementation completed on 2025-11-16**
+
+**Key Changes:**
+
+1. **Dependencies** (pyproject.toml:25):
+   - Added `fastembed>=0.4.2` for BM25 sparse vector embeddings
+   - Adjusted `pillow` version constraint to be compatible with fastembed
+
+2. **Qdrant Collection Schema** (nextcloud_mcp_server/vector/qdrant_client.py:113-128):
+   - Updated to named vectors: `{"dense": VectorParams(...), "sparse": SparseVectorParams(...)}`
+   - Added sparse vector configuration with BM25 index
+   - Maintains backward compatibility with existing collections (detects legacy schema)
+
+3. **BM25 Embedding Provider** (nextcloud_mcp_server/embedding/bm25_provider.py):
+   - Created `BM25SparseEmbeddingProvider` using FastEmbed's `Qdrant/bm25` model
+   - Implements `encode()` and `encode_batch()` methods
+   - Returns sparse vectors as `{indices: list[int], values: list[float]}` format
+
+4. **Document Indexing Pipeline** (nextcloud_mcp_server/vector/processor.py:229-255):
+   - Generates both dense (semantic) and sparse (BM25) embeddings for each document chunk
+   - Updates `PointStruct` to use named vectors: `vector={"dense": ..., "sparse": ...}`
+   - Maintains same chunking strategy (512 words, 50-word overlap)
+
+5. **BM25 Hybrid Search Algorithm** (nextcloud_mcp_server/search/bm25_hybrid.py):
+   - Implements `BM25HybridSearchAlgorithm` using Qdrant's native RRF fusion
+   - Uses `prefetch` parameter for parallel dense + sparse search
+   - Applies `fusion=models.Fusion.RRF` for automatic result merging
+   - Maintains same deduplication and filtering logic as semantic search
+
+6. **MCP Tool Updates** (nextcloud_mcp_server/server/semantic.py:39-68):
+   - Simplified `nc_semantic_search()` to use BM25 hybrid only
+   - Removed `algorithm`, `semantic_weight`, `keyword_weight`, `fuzzy_weight` parameters
+   - Updated default `score_threshold=0.0` for RRF scoring
+   - Returns `search_method="bm25_hybrid"` in responses
+
+7. **Legacy Algorithm Removal**:
+   - Deleted `nextcloud_mcp_server/search/keyword.py` (278 lines)
+   - Deleted `nextcloud_mcp_server/search/fuzzy.py` (220 lines)
+   - Deleted `nextcloud_mcp_server/search/hybrid.py` (238 lines - custom RRF)
+   - Updated `nextcloud_mcp_server/search/__init__.py` to export only BM25 hybrid
+
+**Migration Strategy:**
+- No migration required (vector sync feature is experimental)
+- New documents automatically indexed with both dense + sparse vectors
+- Collection re-creation on first startup with updated schema
+
+**Test Results:**
+- All unit tests passing (118 passed)
+- All integration tests passing (7 semantic search tests)
+- Code formatting verified with ruff
+
+**Benefits Realized:**
+- ✅ Consolidated architecture (single Qdrant database for both dense + sparse)
+- ✅ Native fusion algorithms (database-level, more efficient)
+- ✅ Industry-standard BM25 (replaces custom keyword search)
+- ✅ Simplified codebase (removed 736 lines of legacy code)
+- ✅ Better relevance (handles both semantic and keyword queries)
+- ✅ Configurable fusion methods (RRF and DBSF)
+
+---
+
+### 7. Fusion Algorithm Options
+
+**Update: 2025-11-16**
+
+The BM25 hybrid search now supports two fusion algorithms for combining dense (semantic) and sparse (BM25) search results:
+
+#### Reciprocal Rank Fusion (RRF)
+
+**Default fusion method.** RRF is a widely-used, well-established algorithm that combines rankings from multiple retrieval systems using the reciprocal rank formula:
+
+```
+RRF(doc) = Σ 1/(k + rank_i(doc))
+```
+
+where `k` is a constant (typically 60) and `rank_i(doc)` is the rank of the document in retrieval system `i`.
+
+**Characteristics:**
+- ✅ **General-purpose**: Works well across diverse query types and document collections
+- ✅ **Rank-based**: Focuses on relative rankings rather than absolute scores
+- ✅ **Established**: Well-tested, documented, and understood in IR literature
+- ✅ **Robust**: Less sensitive to score distribution differences between systems
+
+**When to use RRF:**
+- Default choice for most use cases
+- When you have mixed query types (semantic + keyword)
+- When retrieval systems have very different score ranges
+- When you want predictable, well-understood behavior
+
+#### Distribution-Based Score Fusion (DBSF)
+
+**Alternative fusion method.** DBSF normalizes scores from each retrieval system using distribution statistics before combining them:
+
+1. **Normalization**: For each query, calculates mean (μ) and standard deviation (σ) of scores
+2. **Outlier handling**: Uses μ ± 3σ as normalization bounds
+3. **Fusion**: Sums normalized scores across systems
+
+**Characteristics:**
+- ✅ **Score-aware**: Uses actual relevance scores, not just rankings
+- ✅ **Statistical**: Normalizes based on score distribution properties
+- ⚠️ **Experimental**: Newer algorithm, less battle-tested than RRF
+- ⚠️ **Sensitive**: May behave differently depending on score distributions
+
+**When to use DBSF:**
+- When retrieval systems have vastly different score ranges that RRF doesn't balance well
+- When you want to experiment with score-based (vs rank-based) fusion
+- When statistical normalization better matches your use case
+- For A/B testing against RRF to measure retrieval quality improvements
+
+#### Configuration
+
+Both fusion algorithms are exposed via the `fusion` parameter in MCP tools:
+
+```python
+# Use RRF (default)
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf"  # Can be omitted, RRF is default
+)
+
+# Use DBSF
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="dbsf"
+)
+```
+
+The `nc_semantic_search_answer` tool also supports the `fusion` parameter and passes it through to the underlying search.
+
+#### Future: Configurable Weights
+
+**Current limitation**: Neither RRF nor DBSF currently support per-system weights (e.g., 0.8 for semantic, 0.2 for BM25). This is a Qdrant platform limitation tracked in [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067).
+
+When Qdrant adds weight support, the `fusion` parameter can be extended to accept weight configurations:
+
+```python
+# Hypothetical future API
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf",
+    fusion_weights={"dense": 0.7, "sparse": 0.3}  # Not yet implemented
+)
+```
+
+**Recommendation**: Start with RRF (default). If you encounter cases where keyword matches are under- or over-weighted, experiment with DBSF. Monitor [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067) for configurable weight support.
@@ -0,0 +1,380 @@
+# ADR-015: Unified Provider Architecture for Embeddings and Text Generation
+
+**Status:** Accepted
+**Date:** 2025-01-16
+**Deciders:** Development Team
+**Related:** ADR-003 (Vector Database), ADR-008 (MCP Sampling), ADR-013 (RAG Evaluation)
+
+## Context
+
+Prior to this refactoring, the codebase had two separate provider systems:
+
+1. **Embedding Providers** (`nextcloud_mcp_server/embedding/`)
+   - Used `EmbeddingProvider` ABC with methods: `embed()`, `embed_batch()`, `get_dimension()`
+   - Had auto-detection via `EmbeddingService._detect_provider()`
+   - Used for semantic search and vector indexing (production)
+
+2. **LLM Providers** (`tests/rag_evaluation/llm_providers.py`)
+   - Used `LLMProvider` Protocol with method: `generate()`
+   - Had separate factory function `create_llm_provider()`
+   - Used only for RAG evaluation tests (not production)
+
+This fragmentation created several problems:
+
+### Problems with Dual Provider Systems
+
+1. **Code Duplication**
+   - Ollama configuration appeared in both `embedding/service.py` and `tests/rag_evaluation/llm_providers.py`
+   - Similar provider detection logic in multiple places
+   - Separate singleton patterns for each system
+
+2. **Limited Extensibility**
+   - Hard-coded provider detection in `EmbeddingService._detect_provider()`
+   - No support for providers that offer both capabilities (like Bedrock)
+   - Adding new providers required modifying multiple files
+
+3. **Inconsistent Patterns**
+   - BM25 provider didn't follow `EmbeddingProvider` ABC
+   - Different method names across providers (`embed` vs `encode`)
+   - ABC vs Protocol for type checking
+
+4. **Difficult Scaling**
+   - Adding Amazon Bedrock (our third provider) would exacerbate all issues
+   - No clear path for future providers (OpenAI, Cohere, etc.)
+
+### Amazon Bedrock Requirements
+
+Bedrock naturally supports **both** embeddings and text generation:
+- **Embeddings**: `amazon.titan-embed-text-v1/v2`, `cohere.embed-*`
+- **Text Generation**: `anthropic.claude-*`, `meta.llama3-*`, `amazon.titan-text-*`
+- **Unified API**: Single `invoke_model()` method via bedrock-runtime
+
+This made it the perfect opportunity to establish a unified provider architecture.
+
+## Decision
+
+We refactored the provider infrastructure to use a **unified Provider ABC** with optional capabilities:
+
+### 1. Unified Provider Interface
+
+**New Structure:**
+```
+nextcloud_mcp_server/providers/
+├── __init__.py
+├── base.py              # Provider ABC with optional capabilities
+├── registry.py          # Auto-detection and factory
+├── ollama.py            # Supports both embedding + generation
+├── anthropic.py         # Generation only
+├── bedrock.py           # Supports both embedding + generation
+└── simple.py            # Embedding only (testing fallback)
+```
+
+**Base Class (`providers/base.py`):**
+```python
+class Provider(ABC):
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate batch embeddings (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """Get embedding dimension (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """Generate text (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close provider and release resources."""
+        pass
+```
+
+### 2. Provider Registry
+
+**Auto-Detection Priority** (`providers/registry.py`):
+```python
+class ProviderRegistry:
+    @staticmethod
+    def create_provider() -> Provider:
+        # 1. Bedrock (AWS_REGION or BEDROCK_*_MODEL)
+        # 2. Ollama (OLLAMA_BASE_URL)
+        # 3. Simple (fallback)
+```
+
+**Environment Variables:**
+
+**Bedrock:**
+- `AWS_REGION`: AWS region (e.g., "us-east-1")
+- `AWS_ACCESS_KEY_ID`: AWS access key (optional, uses credential chain)
+- `AWS_SECRET_ACCESS_KEY`: AWS secret key (optional)
+- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL`: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+**Ollama:**
+- `OLLAMA_BASE_URL`: Ollama API base URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL`: Model for embeddings (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL`: Model for text generation (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL`: Verify SSL certificates (default: "true")
+
+**Simple (no configuration, fallback):**
+- `SIMPLE_EMBEDDING_DIMENSION`: Embedding dimension (default: 384)
+
+### 3. Backward Compatibility
+
+**Old Code Continues to Work:**
+```python
+# Old way (still works)
+from nextcloud_mcp_server.embedding import get_embedding_service
+
+service = get_embedding_service()  # Returns singleton Provider
+embeddings = await service.embed_batch(texts)
+```
+
+**New Way (recommended):**
+```python
+# New way (cleaner)
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Returns singleton Provider
+embeddings = await provider.embed_batch(texts)
+
+# Can also use generation if provider supports it
+if provider.supports_generation:
+    text = await provider.generate("prompt")
+```
+
+**Migration Path:**
+- `embedding/service.py` now wraps `providers.get_provider()` for compatibility
+- `tests/rag_evaluation/llm_providers.py` now uses unified providers
+- Old imports still work, marked as deprecated in docstrings
+
+### 4. Amazon Bedrock Implementation
+
+**Features:**
+- Supports both embeddings and text generation
+- Model-specific request/response handling for:
+  - Titan Embed (amazon.titan-embed-text-*)
+  - Cohere Embed (cohere.embed-*)
+  - Claude (anthropic.claude-*)
+  - Llama (meta.llama3-*)
+  - Titan Text (amazon.titan-text-*)
+  - Mistral (mistral.*)
+- Uses boto3 bedrock-runtime client
+- Graceful degradation if boto3 not installed
+- Async implementation matching existing patterns
+
+**Model-Specific Handling:**
+```python
+# Bedrock embedding request (Titan)
+{"inputText": text}
+
+# Bedrock generation request (Claude)
+{
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": max_tokens,
+    "temperature": 0.7,
+    "messages": [{"role": "user", "content": prompt}]
+}
+```
+
+## Consequences
+
+### Positive
+
+1. **Sustainable Provider Additions**
+   - New providers only need to implement `Provider` ABC
+   - Auto-detection via environment variables
+   - No modifications to existing code required
+
+2. **Code Consolidation**
+   - Single provider interface instead of two
+   - Unified configuration pattern
+   - Eliminated duplication
+
+3. **Better Extensibility**
+   - Providers can support one or both capabilities
+   - Clear capability detection via properties
+   - Registry pattern simplifies auto-detection
+
+4. **Improved Testing**
+   - RAG evaluation can use any provider (Ollama, Anthropic, Bedrock)
+   - Comprehensive unit tests for all providers
+   - Mocked boto3 tests for Bedrock
+
+5. **Production-Ready Bedrock Support**
+   - Full embedding and generation support
+   - Multiple model families supported
+   - AWS credential chain integration
+
+### Neutral
+
+1. **Optional Boto3 Dependency**
+   - boto3 is dev dependency only (not required for core functionality)
+   - Bedrock provider gracefully fails if boto3 not installed
+   - Users who want Bedrock must `pip install boto3`
+
+2. **Capability Properties**
+   - All providers must implement capability properties
+   - Methods raise `NotImplementedError` if capability not supported
+   - Clear error messages guide users to alternatives
+
+### Negative
+
+1. **Migration Effort**
+   - Existing code must be migrated to new imports (optional, backward compatible)
+   - Documentation needs updating
+   - Users must learn new environment variables
+
+2. **Increased Complexity**
+   - Provider base class has more methods (embedding + generation)
+   - More environment variables to configure
+   - Capability detection adds runtime checks
+
+## Implementation
+
+### Files Created
+
+**New Provider Infrastructure:**
+- `nextcloud_mcp_server/providers/__init__.py`
+- `nextcloud_mcp_server/providers/base.py`
+- `nextcloud_mcp_server/providers/registry.py`
+- `nextcloud_mcp_server/providers/ollama.py`
+- `nextcloud_mcp_server/providers/anthropic.py`
+- `nextcloud_mcp_server/providers/bedrock.py`
+- `nextcloud_mcp_server/providers/simple.py`
+
+**Tests:**
+- `tests/unit/providers/__init__.py`
+- `tests/unit/providers/test_bedrock.py` (9 unit tests)
+
+**Documentation:**
+- `docs/ADR-015-unified-provider-architecture.md` (this file)
+
+### Files Modified
+
+**Backward Compatibility:**
+- `nextcloud_mcp_server/embedding/service.py` - Now wraps `get_provider()`
+- `tests/rag_evaluation/llm_providers.py` - Uses unified providers
+
+**Dependencies:**
+- `pyproject.toml` - Added `boto3>=1.35.0` to dev dependencies
+
+### Testing Results
+
+**Unit Tests:** 127 passed (including 9 new Bedrock tests)
+**Type Checking:** All checks passed (ty)
+**Linting:** All checks passed (ruff)
+**Backward Compatibility:** Verified - existing embedding tests work
+
+## Alternatives Considered
+
+### Alternative 1: Keep Separate Provider Systems
+
+**Pros:**
+- No refactoring needed
+- Simpler short-term
+
+**Cons:**
+- Bedrock would need to be implemented twice
+- Continued code duplication
+- No long-term scalability
+
+**Decision:** Rejected - technical debt would continue to grow
+
+### Alternative 2: Separate Embedding and Generation Providers
+
+Use composition instead of unified interface:
+```python
+class CombinedProvider:
+    def __init__(self, embedding: EmbeddingProvider, generation: LLMProvider):
+        self.embedding = embedding
+        self.generation = generation
+```
+
+**Pros:**
+- Clearer separation of concerns
+- Simpler individual providers
+
+**Cons:**
+- Bedrock and Ollama naturally do both - artificial separation
+- More complex configuration (two providers to configure)
+- More boilerplate code
+
+**Decision:** Rejected - unified interface better matches provider capabilities
+
+### Alternative 3: Plugin System
+
+Dynamic provider registration via entry points:
+```python
+# setup.py
+entry_points={
+    'nextcloud_mcp.providers': [
+        'ollama = nextcloud_mcp_server.providers.ollama:OllamaProvider',
+        'bedrock = nextcloud_mcp_server.providers.bedrock:BedrockProvider',
+    ]
+}
+```
+
+**Pros:**
+- Most extensible
+- Third-party providers possible
+
+**Cons:**
+- Over-engineered for current needs
+- Added complexity
+- No immediate benefit
+
+**Decision:** Deferred - can add later if needed
+
+## Future Work
+
+1. **Additional Providers**
+   - OpenAI (embeddings + generation)
+   - Cohere (embeddings + generation)
+   - Google Vertex AI
+   - Azure OpenAI
+
+2. **Provider Features**
+   - Streaming generation support
+   - Batch API optimization (when available)
+   - Model-specific optimizations
+   - Cost tracking and metrics
+
+3. **Configuration Improvements**
+   - Provider profiles (development, production)
+   - Model aliasing (e.g., "small", "large")
+   - Fallback provider chains
+
+4. **Testing**
+   - Integration tests with real Bedrock endpoints
+   - Performance benchmarking across providers
+   - Cost comparison analysis
+
+## References
+
+- [boto3 Bedrock Runtime Documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Amazon Bedrock User Guide](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html)
+- ADR-003: Vector Database and Semantic Search
+- ADR-008: MCP Sampling for Semantic Search
+- ADR-013: RAG Evaluation Framework
@@ -0,0 +1,492 @@
+# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
+
+**Status:** Proposed
+**Date:** 2025-01-22
+**Deciders:** Development Team
+**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
+
+## Context
+
+[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
+
+- **Discovery**: Marketplace listing for MCP servers
+- **Hosting**: Containerized deployment with auto-scaling
+- **Authentication UI**: OAuth flow presentation for users
+- **Session Configuration**: Per-user settings passed via URL parameters
+- **Observability**: Usage logs and monitoring
+
+### Current Architecture Limitations
+
+The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
+
+1. **Persistent Infrastructure**
+   - Qdrant vector database for semantic search
+   - Background sync worker for content indexing
+   - Refresh token storage for offline access
+
+2. **Single-Tenant Configuration**
+   - Environment variables configure one Nextcloud instance
+   - `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
+   - Or OAuth with a single IdP
+
+3. **Stateful Operations**
+   - Vector sync maintains index state across requests
+   - Token storage persists between sessions
+
+### Smithery Hosting Constraints
+
+Smithery-hosted containers are **stateless by design**:
+
+- No persistent storage between requests
+- No background workers or cron jobs
+- No databases (Qdrant, Redis, etc.)
+- Containers may be recycled at any time
+- Configuration passed per-session via URL parameters
+
+### Opportunity
+
+Many users have **publicly accessible Nextcloud instances** and want to:
+
+1. Try the MCP server without self-hosting infrastructure
+2. Connect multiple users to different Nextcloud instances
+3. Use basic Nextcloud tools without semantic search
+4. Benefit from Smithery's discovery and OAuth UI
+
+## Decision
+
+Implement a **stateless deployment mode** for Smithery that:
+
+1. **Disables stateful features** (vector sync, semantic search)
+2. **Creates clients per-session** from Smithery configuration
+3. **Supports multiple Nextcloud instances** via session config
+4. **Provides a useful subset of tools** that work without infrastructure
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                    Smithery-Hosted Stateless Mode                        │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                          │
+│  MCP Client                    Smithery                                  │
+│  (Cursor, Claude)              Infrastructure                            │
+│        │                            │                                    │
+│        │ 1. Connect                 │                                    │
+│        ├───────────────────────────►│                                    │
+│        │                            │                                    │
+│        │ 2. Config UI               │                                    │
+│        │◄───────────────────────────┤  User enters:                      │
+│        │    (Smithery presents)     │  - nextcloud_url                   │
+│        │                            │  - auth_mode (basic/oauth)         │
+│        │                            │  - credentials                     │
+│        │ 3. Tool call               │                                    │
+│        ├───────────────────────────►│                                    │
+│        │    + session config        │                                    │
+│        │                            │                                    │
+│        │                    ┌───────┴───────┐                            │
+│        │                    │  MCP Server   │                            │
+│        │                    │  Container    │                            │
+│        │                    │               │                            │
+│        │                    │ 4. Create     │                            │
+│        │                    │    client     │                            │
+│        │                    │    from       │                            │
+│        │                    │    config     │                            │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │                    │ 5. Call       │                            │
+│        │                    │    Nextcloud  │───────► User's Nextcloud   │
+│        │                    │    API        │         Instance           │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │ 6. Response        │ Return result │                            │
+│        │◄───────────────────┤               │                            │
+│        │                    └───────────────┘                            │
+│                                                                          │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Session Configuration Schema
+
+```python
+from pydantic import BaseModel, Field
+
+class SmitheryConfigSchema(BaseModel):
+    """Configuration schema for Smithery session."""
+
+    # Required: Nextcloud instance
+    nextcloud_url: str = Field(
+        ...,
+        description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+    )
+
+    # Authentication mode
+    auth_mode: str = Field(
+        "app_password",
+        description="Authentication method: 'app_password' or 'oauth'"
+    )
+
+    # App Password authentication (recommended for Smithery)
+    username: str | None = Field(
+        None,
+        description="Nextcloud username (required for app_password auth)"
+    )
+    app_password: str | None = Field(
+        None,
+        description="Nextcloud app password (Settings → Security → App passwords)"
+    )
+
+    # OAuth authentication (advanced)
+    # When auth_mode='oauth', Smithery handles the OAuth flow
+    # and passes the access token automatically
+```
+
+### Feature Matrix
+
+| Feature | Self-Hosted | Smithery Stateless |
+|---------|-------------|-------------------|
+| **Notes** | | |
+| List/Search notes | ✓ | ✓ |
+| Get/Create/Update notes | ✓ | ✓ |
+| Semantic search | ✓ | ✗ |
+| **Calendar** | | |
+| List calendars | ✓ | ✓ |
+| Get/Create events | ✓ | ✓ |
+| **Contacts** | | |
+| List address books | ✓ | ✓ |
+| Search/Get contacts | ✓ | ✓ |
+| **Files (WebDAV)** | | |
+| List/Download files | ✓ | ✓ |
+| Upload files | ✓ | ✓ |
+| Search files | ✓ | ✓ (keyword only) |
+| **Deck** | | |
+| List boards/cards | ✓ | ✓ |
+| Create/Update cards | ✓ | ✓ |
+| **Tables** | | |
+| List/Query tables | ✓ | ✓ |
+| Create/Update rows | ✓ | ✓ |
+| **Cookbook** | | |
+| List/Get recipes | ✓ | ✓ |
+| **Semantic Search** | | |
+| Vector search | ✓ | ✗ |
+| RAG answers | ✓ | ✗ |
+| **Background Sync** | | |
+| Auto-indexing | ✓ | ✗ |
+| Webhook sync | ✓ | ✗ |
+| **Admin UI (`/app`)** | | |
+| Vector sync status | ✓ | ✗ |
+| Vector visualization | ✓ | ✗ |
+| Webhook management | ✓ | ✗ |
+| Session management | ✓ | ✗ |
+
+### Implementation
+
+#### 1. Deployment Mode Detection
+
+```python
+# nextcloud_mcp_server/config.py
+
+class DeploymentMode(Enum):
+    SELF_HOSTED = "self_hosted"      # Full features, env-based config
+    SMITHERY_STATELESS = "smithery"  # Stateless, session-based config
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment."""
+    if os.getenv("SMITHERY_DEPLOYMENT") == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+```
+
+#### 2. Session-Based Client Factory
+
+```python
+# nextcloud_mcp_server/context.py
+
+async def get_client(ctx: Context) -> NextcloudClient:
+    """Get NextcloudClient - from session config or environment."""
+
+    mode = get_deployment_mode()
+
+    if mode == DeploymentMode.SMITHERY_STATELESS:
+        # Create client from Smithery session config
+        config = ctx.session_config
+        if not config:
+            raise McpError("Session configuration required")
+
+        return NextcloudClient(
+            base_url=config.nextcloud_url,
+            username=config.username,
+            password=config.app_password,
+        )
+    else:
+        # Existing behavior: from environment or OAuth context
+        return await _get_client_from_context(ctx)
+```
+
+#### 3. Conditional Tool Registration
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_mcp_server(mode: DeploymentMode) -> FastMCP:
+    """Create MCP server with mode-appropriate tools."""
+
+    mcp = FastMCP("Nextcloud MCP")
+
+    # Always register core tools
+    configure_notes_tools(mcp)
+    configure_calendar_tools(mcp)
+    configure_contacts_tools(mcp)
+    configure_webdav_tools(mcp)
+    configure_deck_tools(mcp)
+    configure_tables_tools(mcp)
+    configure_cookbook_tools(mcp)
+
+    # Only register stateful tools in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        configure_semantic_tools(mcp)  # Requires Qdrant
+        register_oauth_tools(mcp)       # Requires token storage
+
+    return mcp
+```
+
+#### 4. Exclude Admin UI Routes
+
+The `/app` admin UI should **not be installed** in Smithery mode because:
+
+- **Vector sync status** - No vector sync in stateless mode
+- **Vector visualization** - No Qdrant to visualize
+- **Webhook management** - No webhook sync without background workers
+- **Session management** - No persistent sessions to manage
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_app(mode: DeploymentMode) -> Starlette:
+    """Create Starlette app with mode-appropriate routes."""
+
+    routes = [
+        Route("/health/live", health_live, methods=["GET"]),
+        Route("/health/ready", health_ready, methods=["GET"]),
+    ]
+
+    # Only mount admin UI in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        browser_app = create_browser_app()
+        routes.append(
+            Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
+        )
+        routes.append(Mount("/app", app=browser_app))
+        logger.info("Admin UI mounted at /app")
+    else:
+        logger.info("Admin UI disabled in Smithery stateless mode")
+
+    # Mount FastMCP at root
+    mcp_app = create_mcp_server(mode).streamable_http_app()
+    routes.append(Mount("/", app=mcp_app))
+
+    return Starlette(routes=routes, lifespan=starlette_lifespan)
+```
+
+**Endpoints by Mode:**
+
+| Endpoint | Self-Hosted | Smithery |
+|----------|-------------|----------|
+| `/mcp` | ✓ | ✓ |
+| `/health/live` | ✓ | ✓ |
+| `/health/ready` | ✓ | ✓ |
+| `/.well-known/mcp-config` | ✓ | ✓ |
+| `/app` | ✓ | ✗ |
+| `/app/vector-sync/status` | ✓ | ✗ |
+| `/app/vector-viz` | ✓ | ✗ |
+| `/app/webhooks` | ✓ | ✗ |
+
+#### 5. Smithery Integration Files
+
+**smithery.yaml:**
+```yaml
+runtime: "container"
+build:
+  dockerfile: "Dockerfile.smithery"
+  dockerBuildPath: "."
+startCommand:
+  type: "http"
+  configSchema:
+    type: "object"
+    required: ["nextcloud_url", "username", "app_password"]
+    properties:
+      nextcloud_url:
+        type: "string"
+        title: "Nextcloud URL"
+        description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+      username:
+        type: "string"
+        title: "Username"
+        description: "Your Nextcloud username"
+      app_password:
+        type: "string"
+        title: "App Password"
+        description: "Generate at Settings → Security → App passwords"
+  exampleConfig:
+    nextcloud_url: "https://cloud.example.com"
+    username: "alice"
+    app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
+```
+
+**Dockerfile.smithery:**
+```dockerfile
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+
+# Copy project files
+COPY pyproject.toml uv.lock ./
+COPY nextcloud_mcp_server ./nextcloud_mcp_server
+
+# Install dependencies (without vector/semantic extras)
+RUN uv sync --frozen --no-dev
+
+# Set Smithery mode
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081
+EXPOSE 8081
+
+CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
+```
+
+**nextcloud_mcp_server/smithery_main.py:**
+```python
+"""Smithery-specific entrypoint for stateless deployment."""
+
+import os
+import uvicorn
+from starlette.middleware.cors import CORSMiddleware
+
+from nextcloud_mcp_server.app import create_mcp_server
+from nextcloud_mcp_server.config import DeploymentMode
+
+def main():
+    # Force stateless mode
+    os.environ["SMITHERY_DEPLOYMENT"] = "true"
+    os.environ["VECTOR_SYNC_ENABLED"] = "false"
+
+    mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
+    app = mcp.streamable_http_app()
+
+    # Add CORS for browser-based clients
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["GET", "POST", "OPTIONS"],
+        allow_headers=["*"],
+        expose_headers=["mcp-session-id", "mcp-protocol-version"],
+    )
+
+    # Smithery sets PORT environment variable
+    port = int(os.environ.get("PORT", 8081))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+
+if __name__ == "__main__":
+    main()
+```
+
+### Security Considerations
+
+1. **App Passwords over User Passwords**
+   - Smithery config encourages app passwords (revocable, scoped)
+   - Documentation guides users to create dedicated app passwords
+   - App passwords can be revoked without changing main password
+
+2. **HTTPS Required**
+   - `nextcloud_url` must be HTTPS for production use
+   - Validation rejects HTTP URLs in Smithery mode
+
+3. **No Credential Storage**
+   - Credentials exist only for request duration
+   - No server-side persistence of user credentials
+   - Smithery handles secure config transmission
+
+4. **Scope Limitation**
+   - Stateless mode cannot access offline_access
+   - No background operations on user's behalf
+   - Clear user expectation: tools work during session only
+
+### Migration Path
+
+Users can start with Smithery stateless mode and migrate to self-hosted:
+
+1. **Try on Smithery** → Basic tools, no setup
+2. **Self-host for semantic search** → Add Qdrant, enable vector sync
+3. **Full deployment** → Background sync, webhooks, multi-user OAuth
+
+## Consequences
+
+### Positive
+
+1. **Lower barrier to entry** - Users can try without infrastructure
+2. **Multi-user support** - Each session connects to different Nextcloud
+3. **Smithery ecosystem** - Discovery, observability, OAuth UI
+4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
+
+### Negative
+
+1. **No semantic search** - Key differentiator unavailable on Smithery
+2. **Per-request auth** - Credentials sent with each request
+3. **No offline access** - Cannot perform background operations
+4. **Maintenance burden** - Two deployment modes to support
+
+### Neutral
+
+1. **Feature subset** - May encourage users to self-host for full features
+2. **Documentation needs** - Clear guidance on mode differences required
+
+## Alternatives Considered
+
+### 1. External MCP Only
+
+**Approach:** Only support self-hosted external MCP registration on Smithery.
+
+**Rejected because:**
+- Higher barrier to entry for new users
+- Misses opportunity for Smithery marketplace visibility
+- Users want to try before committing to infrastructure
+
+### 2. Embedded Vector DB (SQLite-vec)
+
+**Approach:** Use SQLite with vector extensions for per-request indexing.
+
+**Rejected because:**
+- No persistence between requests anyway
+- Indexing latency too high for synchronous requests
+- Complexity without benefit in stateless context
+
+### 3. External Vector DB Service
+
+**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
+
+**Rejected because:**
+- Adds external dependency and cost
+- Per-user collections require complex multi-tenancy
+- Sync still impossible without background workers
+
+### 4. Hybrid: Smithery + User's Qdrant
+
+**Approach:** User provides their own Qdrant URL in session config.
+
+**Considered for future:**
+- Could enable semantic search for advanced users
+- Adds complexity to session config
+- Sync still requires external trigger (manual or webhook)
+
+## References
+
+- [Smithery Documentation](https://smithery.ai/docs)
+- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
+- [Smithery External MCPs](https://smithery.ai/docs/build/external)
+- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
+- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
@@ -0,0 +1,338 @@
+# Amazon Bedrock Setup Guide
+
+This guide covers how to configure the Nextcloud MCP Server to use Amazon Bedrock for embeddings and text generation.
+
+## Prerequisites
+
+1. **AWS Account** with access to Amazon Bedrock
+2. **boto3 library** installed: `pip install boto3` or `uv sync --group dev`
+3. **Model Access** - Request access to models in AWS Bedrock console
+
+## Required AWS Permissions
+
+### IAM Policy for Bedrock Access
+
+The AWS IAM user or role needs the following permissions:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockInvokeModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel",
+        "bedrock:InvokeModelWithResponseStream"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+### Minimal Permissions (Production)
+
+For production deployments, restrict to specific models:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockEmbeddings",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0"
+      ]
+    },
+    {
+      "Sid": "BedrockGeneration",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"
+      ]
+    }
+  ]
+}
+```
+
+### Additional Permissions (Optional)
+
+For advanced use cases:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockListModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:ListFoundationModels",
+        "bedrock:GetFoundationModel"
+      ],
+      "Resource": "*"
+    },
+    {
+      "Sid": "BedrockAsyncInvoke",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModelAsync",
+        "bedrock:GetAsyncInvoke",
+        "bedrock:ListAsyncInvokes"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+## Model Access
+
+Before using Bedrock models, you must request access in the AWS Console:
+
+1. Navigate to **Amazon Bedrock** → **Model access**
+2. Click **Manage model access**
+3. Select models you want to use:
+   - **Embeddings:** Amazon Titan Embed Text, Cohere Embed
+   - **Text Generation:** Anthropic Claude, Meta Llama, Amazon Titan Text
+4. Click **Request model access**
+5. Wait for approval (usually instant for most models)
+
+## Supported Models
+
+### Embedding Models
+
+| Provider | Model ID | Dimensions | Best For |
+|----------|----------|------------|----------|
+| Amazon Titan | `amazon.titan-embed-text-v1` | 1,536 | General purpose |
+| Amazon Titan | `amazon.titan-embed-text-v2:0` | 1,024 | Latest, improved quality |
+| Cohere | `cohere.embed-english-v3` | 1,024 | English text |
+| Cohere | `cohere.embed-multilingual-v3` | 1,024 | Multilingual |
+
+### Text Generation Models
+
+| Provider | Model ID | Context | Best For |
+|----------|----------|---------|----------|
+| Anthropic | `anthropic.claude-3-sonnet-20240229-v1:0` | 200K | Balanced performance |
+| Anthropic | `anthropic.claude-3-haiku-20240307-v1:0` | 200K | Fast, cost-effective |
+| Anthropic | `anthropic.claude-3-opus-20240229-v1:0` | 200K | Highest quality |
+| Meta | `meta.llama3-8b-instruct-v1:0` | 8K | Fast, open-source |
+| Meta | `meta.llama3-70b-instruct-v1:0` | 8K | High quality |
+| Amazon | `amazon.titan-text-express-v1` | 8K | Fast, low cost |
+| Mistral | `mistral.mistral-7b-instruct-v0:2` | 32K | Efficient |
+
+## Configuration
+
+### Environment Variables
+
+**Required:**
+```bash
+AWS_REGION=us-east-1
+```
+
+**Optional (at least one model required):**
+```bash
+# For embeddings
+BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+
+# For text generation (RAG evaluation)
+BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+```
+
+**AWS Credentials (choose one method):**
+
+**Method 1: Environment Variables**
+```bash
+AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
+AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 2: AWS Credentials File** (`~/.aws/credentials`)
+```ini
+[default]
+aws_access_key_id = AKIAIOSFODNN7EXAMPLE
+aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 3: IAM Role** (when running on AWS EC2/ECS/Lambda)
+- No credentials needed, uses instance/task role automatically
+
+### Docker Configuration
+
+Add to your `docker-compose.yml`:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+      - BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+```
+
+Or use AWS credentials file volume mount:
+
+```yaml
+services:
+  mcp:
+    volumes:
+      - ~/.aws:/root/.aws:ro
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+```
+
+## Usage Examples
+
+### Embeddings Only
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export AWS_ACCESS_KEY_ID=your-key
+export AWS_SECRET_ACCESS_KEY=your-secret
+
+uv run nextcloud-mcp-server
+```
+
+### Both Embeddings and Generation
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+# For RAG evaluation with Bedrock
+export RAG_EVAL_PROVIDER=bedrock
+export RAG_EVAL_BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+uv run python -m tests.rag_evaluation.evaluate
+```
+
+### Programmatic Usage
+
+```python
+from nextcloud_mcp_server.providers import BedrockProvider
+
+# Embeddings only
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+)
+
+embeddings = await provider.embed_batch(["text1", "text2"])
+
+# Both capabilities
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+    generation_model="anthropic.claude-3-sonnet-20240229-v1:0",
+)
+
+# Generate embeddings
+embedding = await provider.embed("query text")
+
+# Generate text
+response = await provider.generate("Write a summary", max_tokens=500)
+```
+
+## Cost Considerations
+
+### Embedding Costs (as of Jan 2025)
+
+| Model | Price per 1K tokens |
+|-------|---------------------|
+| Titan Embed Text v2 | $0.0001 |
+| Cohere Embed English v3 | $0.0001 |
+
+### Generation Costs (as of Jan 2025)
+
+| Model | Input (per 1K tokens) | Output (per 1K tokens) |
+|-------|----------------------|------------------------|
+| Claude 3 Haiku | $0.00025 | $0.00125 |
+| Claude 3 Sonnet | $0.003 | $0.015 |
+| Claude 3 Opus | $0.015 | $0.075 |
+| Llama 3 8B | $0.0003 | $0.0006 |
+| Titan Text Express | $0.0002 | $0.0006 |
+
+**Note:** Prices vary by region. Check [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/) for current rates.
+
+## Troubleshooting
+
+### Error: "Executable doesn't exist" or boto3 not found
+
+**Solution:**
+```bash
+uv sync --group dev  # Installs boto3
+```
+
+### Error: "AccessDeniedException"
+
+**Causes:**
+1. IAM permissions missing
+2. Model access not requested
+3. Wrong AWS region
+
+**Solution:**
+1. Verify IAM policy includes `bedrock:InvokeModel`
+2. Request model access in Bedrock console
+3. Check model is available in your region
+
+### Error: "ResourceNotFoundException"
+
+**Cause:** Invalid model ID or model not available in region
+
+**Solution:**
+- Verify model ID matches exactly (case-sensitive)
+- Check model availability in your AWS region
+- Use `aws bedrock list-foundation-models` to see available models
+
+### Error: "ThrottlingException"
+
+**Cause:** Rate limit exceeded
+
+**Solution:**
+- Reduce request rate
+- Request quota increase via AWS Support
+- Use batch operations where possible
+
+## Security Best Practices
+
+1. **Use IAM Roles** when running on AWS infrastructure
+2. **Rotate Access Keys** regularly if using IAM users
+3. **Restrict Permissions** to only required models
+4. **Enable CloudTrail** for audit logging
+5. **Use AWS Secrets Manager** for credential management
+6. **Monitor Costs** with AWS Cost Explorer and Budgets
+
+## Regional Availability
+
+Amazon Bedrock is available in:
+- **US East (N. Virginia)**: `us-east-1` ✅ Most models
+- **US West (Oregon)**: `us-west-2` ✅ Most models
+- **Asia Pacific (Singapore)**: `ap-southeast-1`
+- **Asia Pacific (Tokyo)**: `ap-northeast-1`
+- **Europe (Frankfurt)**: `eu-central-1`
+
+**Note:** Model availability varies by region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) for current availability.
+
+## References
+
+- [AWS Bedrock Documentation](https://docs.aws.amazon.com/bedrock/)
+- [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/)
+- [boto3 Bedrock Runtime API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Provider Architecture ADR](./ADR-015-unified-provider-architecture.md)
@@ -108,6 +108,317 @@ NEXTCLOUD_PASSWORD=your_app_password_or_password

 ---

+## Semantic Search Configuration (Optional)
+
+The MCP server includes semantic search capabilities powered by vector embeddings. This feature requires a vector database (Qdrant) and an embedding service.
+
+### Qdrant Vector Database Modes
+
+The server supports three Qdrant deployment modes:
+
+1. **In-Memory Mode** (Default) - Simplest for development and testing
+2. **Persistent Local Mode** - For single-instance deployments with persistence
+3. **Network Mode** - For production with dedicated Qdrant service
+
+#### 1. In-Memory Mode (Default)
+
+No configuration needed! If neither `QDRANT_URL` nor `QDRANT_LOCATION` is set, the server defaults to in-memory mode:
+
+```dotenv
+# No Qdrant configuration needed - defaults to :memory:
+VECTOR_SYNC_ENABLED=true
+```
+
+**Pros:**
+- Zero configuration
+- Fast startup
+- Perfect for testing
+
+**Cons:**
+- Data lost on restart
+- Limited to available RAM
+
+#### 2. Persistent Local Mode
+
+For single-instance deployments that need persistence without a separate Qdrant service:
+
+```dotenv
+# Local persistent storage
+QDRANT_LOCATION=/app/data/qdrant  # Or any writable path
+VECTOR_SYNC_ENABLED=true
+```
+
+**Pros:**
+- Data persists across restarts
+- No separate service needed
+- Suitable for small/medium deployments
+
+**Cons:**
+- Limited to single instance
+- Shares resources with MCP server
+
+#### 3. Network Mode
+
+For production deployments with a dedicated Qdrant service:
+
+```dotenv
+# Network mode configuration
+QDRANT_URL=http://qdrant:6333
+QDRANT_API_KEY=your-secret-api-key  # Optional
+QDRANT_COLLECTION=nextcloud_content  # Optional
+VECTOR_SYNC_ENABLED=true
+```
+
+**Pros:**
+- Scalable and performant
+- Can be shared across multiple MCP instances
+- Supports clustering and replication
+
+**Cons:**
+- Requires separate Qdrant service
+- More complex deployment
+
+### Qdrant Collection Naming
+
+Collection names are automatically generated to include the embedding model, ensuring safe model switching and preventing dimension mismatches.
+
+#### Auto-Generated Naming (Default)
+
+**Format:** `{deployment-id}-{model-name}`
+
+**Components:**
+- **Deployment ID:** `OTEL_SERVICE_NAME` (if configured) or `hostname` (fallback)
+- **Model name:** `OLLAMA_EMBEDDING_MODEL`
+
+**Examples:**
+
+```bash
+# With OTEL service name configured
+OTEL_SERVICE_NAME=my-mcp-server
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# → Collection: "my-mcp-server-nomic-embed-text"
+
+# Simple Docker deployment (OTEL not configured)
+# hostname=mcp-container
+OLLAMA_EMBEDDING_MODEL=all-minilm
+# → Collection: "mcp-container-all-minilm"
+```
+
+#### Switching Embedding Models
+
+When you change `OLLAMA_EMBEDDING_MODEL`, a new collection is automatically created:
+
+```bash
+# Initial setup
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# Collection: "my-server-nomic-embed-text" (768 dimensions)
+
+# Change model
+OLLAMA_EMBEDDING_MODEL=all-minilm
+# Collection: "my-server-all-minilm" (384 dimensions)
+# → New collection created, full re-embedding occurs
+```
+
+**Important:**
+- **Collections are mutually exclusive** - vectors cannot be shared between different embedding models
+- **Switching models requires re-embedding** all documents (may take time for large note collections)
+- **Old collection remains** in Qdrant and can be deleted manually if no longer needed
+
+#### Explicit Override
+
+Set `QDRANT_COLLECTION` to use a specific collection name:
+
+```bash
+QDRANT_COLLECTION=my-custom-collection  # Bypasses auto-generation
+```
+
+**Use cases:**
+- Backward compatibility with existing deployments
+- Custom naming schemes
+- Sharing a collection across deployments (advanced)
+
+#### Multi-Server Deployments
+
+Each server should have a unique deployment ID to avoid collection collisions:
+
+```bash
+# Server 1 (Production)
+OTEL_SERVICE_NAME=mcp-prod
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# → Collection: "mcp-prod-nomic-embed-text"
+
+# Server 2 (Staging)
+OTEL_SERVICE_NAME=mcp-staging
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# → Collection: "mcp-staging-nomic-embed-text"
+
+# Server 3 (Different model)
+OTEL_SERVICE_NAME=mcp-experimental
+OLLAMA_EMBEDDING_MODEL=bge-large
+# → Collection: "mcp-experimental-bge-large"
+```
+
+**Benefits:**
+- Multiple MCP servers can share one Qdrant instance safely
+- No naming collisions between deployments
+- Clear collection ownership (can see which deployment and model)
+
+#### Dimension Validation
+
+The server validates collection dimensions on startup:
+
+```
+Dimension mismatch for collection 'my-server-nomic-embed-text':
+  Expected: 384 (from embedding model 'all-minilm')
+  Found: 768
+This usually means you changed the embedding model.
+Solutions:
+  1. Delete the old collection: Collection will be recreated with new dimensions
+  2. Set QDRANT_COLLECTION to use a different collection name
+  3. Revert OLLAMA_EMBEDDING_MODEL to the original model
+```
+
+**What this prevents:**
+- Runtime errors from dimension mismatches
+- Data corruption in Qdrant
+- Confusing error messages during indexing
+
+### Vector Sync Configuration
+
+Control background indexing behavior:
+
+```dotenv
+# Vector sync settings (ADR-007)
+VECTOR_SYNC_ENABLED=true              # Enable background indexing
+VECTOR_SYNC_SCAN_INTERVAL=300         # Scan interval in seconds (default: 5 minutes)
+VECTOR_SYNC_PROCESSOR_WORKERS=3       # Concurrent indexing workers (default: 3)
+VECTOR_SYNC_QUEUE_MAX_SIZE=10000      # Max queued documents (default: 10000)
+
+# Document chunking settings (for vector embeddings)
+DOCUMENT_CHUNK_SIZE=512               # Words per chunk (default: 512)
+DOCUMENT_CHUNK_OVERLAP=50             # Overlapping words between chunks (default: 50)
+```
+
+### Embedding Service Configuration
+
+The server uses an embedding service to generate vector representations. Two options are available:
+
+#### Ollama (Recommended)
+
+Use a local Ollama instance for embeddings:
+
+```dotenv
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Default model
+OLLAMA_VERIFY_SSL=true                   # Verify SSL certificates
+```
+
+#### Simple Embedding Provider (Fallback)
+
+If `OLLAMA_BASE_URL` is not set, the server uses a simple random embedding provider for testing. This is **not suitable for production** as it generates random embeddings with no semantic meaning.
+
+### Document Chunking Configuration
+
+The server chunks documents before embedding to handle documents larger than the embedding model's context window. Chunk size and overlap can be tuned based on your embedding model and content type.
+
+#### Choosing Chunk Size
+
+**Smaller chunks (256-384 words)**:
+- More precise matching
+- Less context per chunk
+- Better for finding specific information
+- Higher storage requirements (more vectors)
+
+**Larger chunks (768-1024 words)**:
+- More context per chunk
+- Less precise matching
+- Better for understanding broader topics
+- Lower storage requirements (fewer vectors)
+
+**Default (512 words)**:
+- Balanced approach suitable for most use cases
+- Works well with typical note lengths
+- Good compromise between precision and context
+
+#### Choosing Overlap
+
+Overlap preserves context across chunk boundaries. Recommended settings:
+
+- **10-20% of chunk size** (e.g., 50-100 words for 512-word chunks)
+- **Too small** (<10%): May lose context at boundaries
+- **Too large** (>20%): Redundant storage, diminishing returns
+
+**Examples**:
+```dotenv
+# Precise matching for short notes
+DOCUMENT_CHUNK_SIZE=256
+DOCUMENT_CHUNK_OVERLAP=25
+
+# Default balanced configuration
+DOCUMENT_CHUNK_SIZE=512
+DOCUMENT_CHUNK_OVERLAP=50
+
+# More context for long documents
+DOCUMENT_CHUNK_SIZE=1024
+DOCUMENT_CHUNK_OVERLAP=100
+```
+
+**Important**: Changing chunk size requires re-embedding all documents. The collection naming strategy (see "Qdrant Collection Naming" above) helps manage this by creating separate collections for different configurations.
+
+### Environment Variables Reference
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `QDRANT_URL` | ⚠️ Optional | - | Qdrant service URL (network mode) - mutually exclusive with `QDRANT_LOCATION` |
+| `QDRANT_LOCATION` | ⚠️ Optional | `:memory:` | Local Qdrant path (`:memory:` or `/path/to/data`) - mutually exclusive with `QDRANT_URL` |
+| `QDRANT_API_KEY` | ⚠️ Optional | - | Qdrant API key (network mode only) |
+| `QDRANT_COLLECTION` | ⚠️ Optional | `nextcloud_content` | Qdrant collection name |
+| `VECTOR_SYNC_ENABLED` | ⚠️ Optional | `false` | Enable background vector indexing |
+| `VECTOR_SYNC_SCAN_INTERVAL` | ⚠️ Optional | `300` | Document scan interval (seconds) |
+| `VECTOR_SYNC_PROCESSOR_WORKERS` | ⚠️ Optional | `3` | Concurrent indexing workers |
+| `VECTOR_SYNC_QUEUE_MAX_SIZE` | ⚠️ Optional | `10000` | Max queued documents |
+| `OLLAMA_BASE_URL` | ⚠️ Optional | - | Ollama API endpoint for embeddings |
+| `OLLAMA_EMBEDDING_MODEL` | ⚠️ Optional | `nomic-embed-text` | Embedding model to use |
+| `OLLAMA_VERIFY_SSL` | ⚠️ Optional | `true` | Verify SSL certificates |
+| `DOCUMENT_CHUNK_SIZE` | ⚠️ Optional | `512` | Words per chunk for document embedding |
+| `DOCUMENT_CHUNK_OVERLAP` | ⚠️ Optional | `50` | Overlapping words between chunks (must be < chunk size) |
+
+### Docker Compose Example
+
+Enable network mode Qdrant with docker-compose:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - QDRANT_URL=http://qdrant:6333
+      - VECTOR_SYNC_ENABLED=true
+
+  qdrant:
+    image: qdrant/qdrant:latest
+    ports:
+      - 127.0.0.1:6333:6333
+    volumes:
+      - qdrant-data:/qdrant/storage
+    profiles:
+      - qdrant  # Optional service
+
+volumes:
+  qdrant-data:
+```
+
+Start with Qdrant service:
+```bash
+docker-compose --profile qdrant up
+```
+
+Or use default in-memory mode (no `--profile` needed):
+```bash
+docker-compose up
+```
+
+---
+
 ## Loading Environment Variables

 After creating your `.env` file, load the environment variables:
@@ -8,7 +8,9 @@
 | `nc_notes_update_note` | Update an existing note by ID |
 | `nc_notes_append_content` | Append content to an existing note with a clear separator |
 | `nc_notes_delete_note` | Delete a note by ID |
-| `nc_notes_search_notes` | Search notes by title or content |
+| `nc_notes_search_notes` | Search notes by title or content (keyword search) |
+| `nc_notes_semantic_search` | Search notes by meaning using vector embeddings (requires vector sync) |
+| `nc_notes_semantic_search_answer` | Search notes semantically and generate a natural language answer via MCP sampling (requires vector sync and sampling-capable MCP client) |

 ### Note Attachments

@@ -634,6 +634,12 @@ The server supports the following OAuth scopes, organized by Nextcloud app:
 - `sharing:read` - List shares and read share information
 - `sharing:write` - Create, update, and delete shares

+#### Semantic Search (Multi-App Vector Database)
+- `semantic:read` - Query vector database, perform semantic search across all indexed Nextcloud apps (notes, calendar, deck, files, contacts)
+- `semantic:write` - Enable/disable background vector synchronization, manage indexing settings
+
+> **Note**: Semantic search scopes provide access to the vector database that indexes content across **all** Nextcloud apps. Unlike app-specific scopes (e.g., `notes:read`), semantic scopes grant cross-app search capabilities powered by background vector synchronization (ADR-007).
+
 ### Scope Discovery

 The MCP server provides scope discovery through two mechanisms:
@@ -0,0 +1,258 @@
+# Observability and Monitoring
+
+The Nextcloud MCP Server includes comprehensive observability features for production deployments:
+
+- **Prometheus metrics** for monitoring performance and health
+- **OpenTelemetry distributed tracing** for debugging request flows
+- **Structured JSON logging** with trace correlation
+- **Kubernetes integration** via ServiceMonitor and PrometheusRule
+
+## Quick Start
+
+### Local Development with Prometheus
+
+```bash
+# Enable metrics (enabled by default)
+export METRICS_ENABLED=true
+export METRICS_PORT=9090
+
+# Enable tracing (optional - tracing is enabled when OTEL_EXPORTER_OTLP_ENDPOINT is set)
+export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
+
+# Start the server
+docker-compose up -d mcp
+```
+
+Access metrics at: `http://localhost:9090/metrics`
+
+### Kubernetes Deployment
+
+Metrics are automatically scraped if you have Prometheus Operator installed:
+
+```bash
+helm install nextcloud-mcp charts/nextcloud-mcp-server \
+  --set observability.metrics.enabled=true \
+  --set observability.tracing.enabled=true \
+  --set observability.tracing.endpoint=http://opentelemetry-collector:4317 \
+  --set serviceMonitor.enabled=true
+```
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `METRICS_ENABLED` | `true` | Enable Prometheus metrics |
+| `METRICS_PORT` | `9090` | Port for metrics endpoint |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | - | OTLP gRPC endpoint (e.g., `http://otel-collector:4317`). Tracing is enabled when this is set. |
+| `OTEL_SERVICE_NAME` | `nextcloud-mcp-server` | Service name in traces |
+| `OTEL_TRACES_SAMPLER` | `always_on` | Trace sampling strategy |
+| `OTEL_TRACES_SAMPLER_ARG` | `1.0` | Sampling rate (0.0-1.0) |
+| `LOG_FORMAT` | `json` | Log format (`json` or `text`) |
+| `LOG_LEVEL` | `INFO` | Minimum log level |
+| `LOG_INCLUDE_TRACE_CONTEXT` | `true` | Include trace IDs in logs |
+
+### Helm Chart Configuration
+
+```yaml
+observability:
+  metrics:
+    enabled: true
+    port: 9090
+    path: /metrics
+
+  tracing:
+    enabled: true
+    endpoint: "http://opentelemetry-collector:4317"
+    samplingRate: 1.0
+
+  logging:
+    format: json
+    level: INFO
+    includeTraceContext: true
+
+serviceMonitor:
+  enabled: true
+  interval: 30s
+  scrapeTimeout: 10s
+```
+
+## Metrics
+
+### HTTP Server Metrics (RED)
+
+- `mcp_http_requests_total` - Total HTTP requests
+- `mcp_http_request_duration_seconds` - Request latency histogram
+- `mcp_http_requests_in_progress` - In-flight requests gauge
+
+### MCP Tool Metrics
+
+- `mcp_tool_calls_total` - Tool invocation count by status
+- `mcp_tool_duration_seconds` - Tool execution latency
+- `mcp_tool_errors_total` - Tool errors by type
+
+### Nextcloud API Metrics
+
+- `mcp_nextcloud_api_requests_total` - API calls by app and status
+- `mcp_nextcloud_api_duration_seconds` - API latency by app
+- `mcp_nextcloud_api_retries_total` - Retry count (429, timeout, etc.)
+
+### OAuth Flow Metrics
+
+- `mcp_oauth_token_validations_total` - Token validation count
+- `mcp_oauth_token_exchange_total` - Token exchange operations
+- `mcp_oauth_token_cache_hits_total` - Cache hit/miss rate
+- `mcp_oauth_refresh_token_operations_total` - Refresh token storage ops
+
+### Vector Sync Metrics (when enabled)
+
+- `mcp_vector_sync_documents_scanned_total` - Documents discovered
+- `mcp_vector_sync_documents_processed_total` - Processing results
+- `mcp_vector_sync_processing_duration_seconds` - Processing latency
+- `mcp_vector_sync_queue_size` - Current queue depth
+- `mcp_qdrant_operations_total` - Qdrant DB operations
+
+### Database Metrics
+
+- `mcp_db_operations_total` - DB operations (SQLite, Qdrant)
+- `mcp_db_operation_duration_seconds` - DB latency
+
+### Dependency Health
+
+- `mcp_dependency_health` - External dependency status (1=up, 0=down)
+- `mcp_dependency_check_duration_seconds` - Health check latency
+
+## Distributed Tracing
+
+### Span Hierarchy
+
+```
+HTTP POST /messages
+├── mcp.tool.nc_notes_create_note
+│   └── nextcloud.api.notes.POST
+│       └── httpx request (auto-instrumented)
+└── oauth.token.validate (if OAuth mode)
+    └── httpx request to IdP
+```
+
+### Span Attributes
+
+- **MCP tools**: `mcp.tool.name`, `mcp.tool.args` (sanitized)
+- **Nextcloud API**: `nextcloud.app`, `http.method`, `http.status_code`
+- **OAuth**: `oauth.operation`, `oauth.method`
+- **Vector sync**: `vector_sync.operation`, `vector_sync.document_count`
+
+### Trace Context in Logs
+
+When tracing is enabled, all logs include `trace_id` and `span_id`:
+
+```json
+{
+  "timestamp": "2025-01-09T12:34:56.789Z",
+  "level": "INFO",
+  "logger": "nextcloud_mcp_server.server.notes",
+  "message": "Note created successfully",
+  "trace_id": "a1b2c3d4e5f6...",
+  "span_id": "123456789abc...",
+  "note_id": 42
+}
+```
+
+## Dashboards
+
+### Prometheus Queries
+
+**Request Rate (req/s)**:
+```promql
+sum(rate(mcp_http_requests_total[5m])) by (method, endpoint)
+```
+
+**Error Rate (%)**:
+```promql
+sum(rate(mcp_http_requests_total{status_code=~"5.."}[5m]))
+  / sum(rate(mcp_http_requests_total[5m])) * 100
+```
+
+**P95 Latency**:
+```promql
+histogram_quantile(0.95,
+  sum(rate(mcp_http_request_duration_seconds_bucket[5m])) by (le, endpoint)
+)
+```
+
+**Top Tools by Volume**:
+```promql
+topk(10, sum(rate(mcp_tool_calls_total[5m])) by (tool_name))
+```
+
+**Nextcloud API Health**:
+```promql
+sum(rate(mcp_nextcloud_api_requests_total{status_code!~"2.."}[5m])) by (app)
+```
+
+## Alerts
+
+### Recommended Alert Rules
+
+**Critical**:
+- Server down for >5min
+- Error rate >5% for >5min
+- P95 latency >1s for >5min
+- Dependency down for >2min
+
+**Warning**:
+- Token validation errors >1% for >10min
+- Vector sync queue >100 for >15min
+- Qdrant slow (p95 >500ms) for >10min
+
+See `charts/nextcloud-mcp-server/templates/prometheusrule.yaml` for complete definitions.
+
+## Troubleshooting
+
+### Metrics Not Appearing
+
+1. Check metrics are enabled: `curl http://localhost:9090/metrics`
+2. Verify ServiceMonitor labels match Prometheus selector
+3. Check Prometheus target status: `http://prometheus:9090/targets`
+
+### Traces Not Appearing
+
+1. Verify OTLP endpoint is reachable: `curl http://otel-collector:4317`
+2. Check collector logs for errors
+3. Verify sampling rate is not 0.0
+4. Check trace backend (Jaeger/Tempo) connectivity
+
+### High Cardinality Metrics
+
+If you see cardinality warnings:
+- Middleware normalizes endpoints (e.g., `/user/123` → `/user/*`)
+- OAuth tokens are never included in metric labels
+- User IDs are not tracked (use tracing for per-user debugging)
+
+## Performance Impact
+
+- **Metrics**: <1% overhead (counters/histograms are very fast)
+- **Tracing**: ~2-5% overhead at 100% sampling
+- **JSON logging**: <1% overhead vs text logging
+
+**Recommendation**: Always enable metrics. Enable tracing in staging/production with 10-50% sampling.
+
+## Architecture
+
+The observability stack integrates at multiple layers:
+
+1. **HTTP Layer**: `ObservabilityMiddleware` tracks all HTTP requests
+2. **MCP Layer**: Tools use `@instrument_tool` for automatic metrics and trace span creation
+3. **Client Layer**: `BaseNextcloudClient` tracks all API calls
+4. **OAuth Layer**: Token operations are traced and metered
+5. **Background Tasks**: Vector sync operations emit metrics/traces
+
+All components use shared Prometheus `Registry` and OpenTelemetry `TracerProvider`.
+
+## References
+
+- [Prometheus Best Practices](https://prometheus.io/docs/practices/)
+- [OpenTelemetry Python SDK](https://opentelemetry.io/docs/languages/python/)
+- [Prometheus Operator](https://prometheus-operator.dev/)
+- [Grafana Dashboards](https://grafana.com/docs/grafana/latest/dashboards/)
@@ -0,0 +1,921 @@
+# Semantic Search Architecture
+
+This document explains the architecture of the semantic search feature in the Nextcloud MCP Server, including background synchronization, vector search, and optional AI-generated answers via MCP sampling.
+
+> [!IMPORTANT]
+> **Status: Experimental**
+> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
+> - Currently supports **Notes app only** (multi-app architecture ready, additional apps planned)
+> - Requires additional infrastructure (Qdrant vector database + Ollama embedding service)
+> - RAG answer generation requires MCP client sampling support
+
+## Overview
+
+### What is Semantic Search?
+
+**Semantic search** finds information based on **meaning** rather than exact keyword matches. It uses vector embeddings to understand that "car" and "automobile" are similar, or that "bread recipe" matches "how to bake bread."
+
+**Traditional keyword search:**
+```
+Query: "machine learning"
+Matches: Only notes containing "machine learning" exactly
+Misses: Notes with "neural networks", "AI models", "deep learning"
+```
+
+**Semantic search:**
+```
+Query: "machine learning"
+Matches: Notes about machine learning, neural networks, AI, deep learning, etc.
+Understanding: Semantic similarity via vector embeddings
+```
+
+### Why It Matters
+
+Semantic search enables:
+- **Natural language queries** - Ask questions in plain language
+- **Conceptual discovery** - Find related content even with different terminology
+- **Cross-reference insights** - Connect ideas across your knowledge base
+- **AI-powered answers** - Generate summaries with citations (optional, requires MCP sampling)
+
+### Current Support
+
+- **Supported Apps**: Notes (fully implemented)
+- **Planned Apps**: Calendar events, Calendar tasks, Deck cards, Files (with text extraction), Contacts
+- **Architecture**: Multi-app plugin system ready, awaiting implementation
+
+## System Components
+
+```mermaid
+graph TB
+    subgraph "MCP Client"
+        Client[Claude Desktop, IDEs, etc.]
+    end
+
+    subgraph "Nextcloud MCP Server"
+        MCP[MCP Server]
+        Scanner[Background Scanner<br/>Hourly Change Detection]
+        Queue[Document Queue]
+        Processor[Embedding Processors<br/>Concurrent Workers]
+    end
+
+    subgraph "Infrastructure"
+        Qdrant[(Qdrant<br/>Vector Database)]
+        Ollama[Ollama<br/>Embedding Service]
+        NC[Nextcloud<br/>Notes API, CalDAV, etc.]
+    end
+
+    Client <-->|MCP Protocol| MCP
+    Scanner -->|Fetch Changes| NC
+    Scanner -->|Enqueue Documents| Queue
+    Queue -->|Process Batch| Processor
+    Processor -->|Generate Embeddings| Ollama
+    Processor -->|Store Vectors| Qdrant
+    MCP -->|Search Queries| Qdrant
+    MCP -->|Verify Access| NC
+```
+
+**Component Roles:**
+
+- **MCP Server**: Exposes semantic search tools (`nc_semantic_search`, `nc_semantic_search_answer`, `nc_get_vector_sync_status`)
+- **Background Scanner**: Discovers changed documents every hour using ETag-based change detection
+- **Document Queue**: Holds pending documents for embedding generation
+- **Embedding Processors**: Generate vector embeddings via Ollama (concurrent workers)
+- **Qdrant Vector Database**: Stores document vectors with metadata and user_id filtering
+- **Ollama Embedding Service**: Converts text to 768-dimensional vectors (default: `nomic-embed-text` model)
+- **Nextcloud APIs**: Source of truth for documents and access control verification
+
+## How It Works: Background Synchronization
+
+Background synchronization runs automatically when `VECTOR_SYNC_ENABLED=true`, discovering changes and indexing documents without user intervention.
+
+```mermaid
+sequenceDiagram
+    participant Timer
+    participant Scanner
+    participant NC as Nextcloud API
+    participant Queue
+    participant Processor
+    participant Ollama
+    participant Qdrant
+
+    Timer->>Scanner: Trigger (hourly)
+    Scanner->>NC: Fetch all notes<br/>(Notes API)
+    NC-->>Scanner: Notes with ETags
+    Scanner->>Qdrant: Check indexed documents
+    Qdrant-->>Scanner: Existing ETags
+    Scanner->>Scanner: Identify changes<br/>(new/modified/deleted)
+    Scanner->>Queue: Enqueue changed docs
+
+    loop Continuous Processing
+        Processor->>Queue: Fetch batch
+        Queue-->>Processor: Documents
+        Processor->>Ollama: Generate embeddings
+        Ollama-->>Processor: 768-dim vectors
+        Processor->>Qdrant: Upsert vectors<br/>(with user_id, doc_type)
+    end
+```
+
+### Scanner Behavior
+
+**Hourly Trigger:**
+- Runs every hour (configurable)
+- Fetches all notes from Nextcloud Notes API
+- Compares ETags with Qdrant's indexed state
+- Enqueues new/modified documents
+
+**Change Detection:**
+- **New documents**: No entry in Qdrant → enqueue for indexing
+- **Modified documents**: ETag mismatch → enqueue for re-indexing
+- **Deleted documents**: In Qdrant but not in Nextcloud → delete from Qdrant
+
+**Multi-App Plugin Architecture:**
+```python
+# Each app implements DocumentScanner interface
+class NotesScanner(DocumentScanner):
+    async def scan(self) -> list[Document]:
+        # Fetch notes, detect changes, return documents
+```
+
+Currently only `NotesScanner` is implemented. Future: `CalendarScanner`, `DeckScanner`, `FilesScanner`, etc.
+
+### Queue Processing
+
+**Document Queue:**
+- In-memory FIFO queue (not persistent across restarts)
+- Holds documents pending embedding generation
+- Batch processing for efficiency
+
+**Processor Pool:**
+- Concurrent workers using `anyio.TaskGroup`
+- Process documents in parallel (default: 4 workers)
+- Each worker: fetch document → generate embedding → store in Qdrant
+
+**Backpressure Handling:**
+- Queue size limits prevent memory exhaustion
+- Slow consumers (Ollama) naturally pace the system
+
+### Vector Storage
+
+**Qdrant Collection Schema:**
+```
+{
+  "id": "note_123",
+  "vector": [768 dimensions],
+  "payload": {
+    "user_id": "alice",
+    "doc_type": "note",
+    "doc_id": "123",
+    "title": "Machine Learning Notes",
+    "content": "Neural networks are...",
+    "etag": "abc123",
+    "last_modified": "2025-01-15T10:30:00Z"
+  }
+}
+```
+
+**Key Fields:**
+- `user_id`: Multi-tenancy filtering (each user's vectors isolated)
+- `doc_type`: App identifier ("note", "event", "card", etc.)
+- `etag`: Change detection for incremental updates
+- `chunk_index`: Position of this chunk within the document (0-indexed)
+- `total_chunks`: Total number of chunks for this document
+- `excerpt`: First 200 characters of chunk (for display)
+
+### Document Chunking Strategy
+
+Documents are chunked before embedding to handle content larger than the embedding model's context window and to improve search precision.
+
+**Configuration:**
+```dotenv
+DOCUMENT_CHUNK_SIZE=512       # Words per chunk (default)
+DOCUMENT_CHUNK_OVERLAP=50     # Overlapping words between chunks (default)
+```
+
+**Chunking Process:**
+1. **Text combination**: Document title + content (e.g., `"Note Title\n\nNote content..."`)
+2. **Word-based splitting**: Simple whitespace tokenization
+3. **Sliding window**: Create overlapping chunks
+4. **Individual embedding**: Each chunk gets its own vector
+5. **Separate storage**: Each chunk stored as distinct point in Qdrant
+
+**Example:**
+```
+Document (1000 words):
+→ Chunk 0: words 0-511
+→ Chunk 1: words 462-973 (overlaps by 50 words)
+→ Chunk 2: words 924-999 (last chunk, partial)
+
+Each chunk stored as separate vector with metadata:
+- chunk_index: 0, 1, 2
+- total_chunks: 3
+- excerpt: First 200 chars of each chunk
+```
+
+**Search Behavior:**
+- **Vector search** operates on chunks (not whole documents)
+- **Deduplication** collapses multiple matching chunks from same document
+- **Best match** returns highest-scoring chunk's excerpt
+- **Access verification** still performed at document level
+
+**Tuning Recommendations:**
+- **Small chunks (256-384 words)**: More precise, less context, more storage
+- **Large chunks (768-1024 words)**: More context, less precise, less storage
+- **Overlap (10-20% of chunk size)**: Preserves context across boundaries
+- **Match to embedding model**: Consider model's context window when sizing
+
+**Important**: Changing chunk size requires re-embedding all documents. Use the collection naming strategy to manage different chunking configurations.
+
+### Collection Naming and Model Switching
+
+**Auto-generated collection names:**
+- **Format:** `{deployment-id}-{model-name}`
+- **Deployment ID:** `OTEL_SERVICE_NAME` (if configured) or `hostname` (fallback)
+- **Model name:** `OLLAMA_EMBEDDING_MODEL`
+- **Example:** `"my-mcp-server-nomic-embed-text"`, `"mcp-container-all-minilm"`
+
+**Why model-based naming:**
+- Ensures each embedding model gets its own collection
+- Prevents dimension mismatches when switching models
+- Enables safe model experimentation (new model = new collection)
+- Supports multi-server deployments (different deployment IDs)
+
+**Switching embedding models:**
+
+Collections are **mutually exclusive** - vectors from one embedding model cannot be used with another. When you change the embedding model:
+
+1. **New collection is created** with the new model's dimensions
+2. **Full re-embedding occurs** - scanner processes all documents again
+3. **Old collection remains** - can be deleted manually if no longer needed
+4. **Dimension validation** - server fails fast if collection dimension doesn't match model
+
+**Example workflow:**
+```bash
+# Start with nomic-embed-text (768 dimensions)
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# Collection: "my-server-nomic-embed-text"
+# → Scanner indexes 1000 notes → 1000 vectors in collection
+
+# Switch to all-minilm (384 dimensions)
+OLLAMA_EMBEDDING_MODEL=all-minilm
+# Collection: "my-server-all-minilm"
+# → Scanner detects 0 indexed documents → re-embeds 1000 notes
+# → Old collection "my-server-nomic-embed-text" still exists in Qdrant
+```
+
+**Re-embedding performance:**
+- CPU-only: 1-5 notes/second
+- With GPU: 50-200 notes/second
+- 1000 notes: 3-16 minutes (CPU) or 5-20 seconds (GPU)
+
+**Multi-server deployments:**
+
+Multiple MCP servers can share one Qdrant instance safely:
+
+```bash
+# Server 1 (Production)
+OTEL_SERVICE_NAME=mcp-prod
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+# → Collection: "mcp-prod-nomic-embed-text"
+
+# Server 2 (Staging with different model)
+OTEL_SERVICE_NAME=mcp-staging
+OLLAMA_EMBEDDING_MODEL=all-minilm
+# → Collection: "mcp-staging-all-minilm"
+```
+
+Each deployment gets its own collection - no naming collisions or dimension conflicts.
+
+## How It Works: Semantic Search
+
+Semantic search converts user queries into vectors and finds similar documents using cosine similarity.
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant MCP as MCP Server
+    participant Ollama
+    participant Qdrant
+    participant NC as Nextcloud API
+
+    User->>MCP: nc_semantic_search("machine learning")
+    MCP->>MCP: Check OAuth scope<br/>(semantic:read)
+    MCP->>Ollama: Generate query embedding
+    Ollama-->>MCP: Query vector (768-dim)
+    MCP->>Qdrant: Search similar vectors<br/>(filter: user_id=alice)
+    Qdrant-->>MCP: Top K results<br/>(with similarity scores)
+
+    loop For each result
+        MCP->>NC: Verify access<br/>(fetch note by ID)
+        alt Access granted
+            NC-->>MCP: Note metadata
+        else Access denied (404/401)
+            MCP->>MCP: Filter out result
+        end
+    end
+
+    MCP-->>User: Search results<br/>(with scores, excerpts)
+```
+
+### Dual-Phase Authorization
+
+**Phase 1: OAuth Scope Check**
+- Verify user has `semantic:read` scope
+- Rejects unauthorized users immediately
+
+**Phase 2: Per-Document Verification**
+- For each search result, fetch document via app API (Notes, Calendar, etc.)
+- If fetch succeeds (200 OK), user has access
+- If fetch fails (404 Not Found, 401 Unauthorized), filter out result
+- **Security**: Prevents information leakage from vector search alone
+
+**Rationale:**
+- Vector database doesn't know about sharing, permissions changes, or deleted documents
+- App APIs are source of truth for access control
+- Verification ensures users only see documents they can access
+
+### Search Flow
+
+1. **Query Embedding**: Convert user query to 768-dimensional vector via Ollama
+2. **Vector Search**: Find top K similar vectors in Qdrant (cosine similarity)
+3. **User Filtering**: Qdrant pre-filters by `user_id` (multi-tenancy)
+4. **Access Verification**: Fetch each document via app API to verify current access
+5. **Result Ranking**: Return results sorted by similarity score
+6. **Response**: Include document excerpts, metadata, and similarity scores
+
+### Performance
+
+- **Query latency**: 50-200ms typical (embedding + vector search + verification)
+- **Accuracy**: Depends on embedding model quality (`nomic-embed-text` recommended)
+- **Scalability**: Qdrant handles millions of vectors efficiently
+
+## How It Works: RAG with MCP Sampling (Optional)
+
+The `nc_semantic_search_answer` tool generates AI-powered answers with citations using **MCP sampling** - requesting the MCP client's LLM to generate text.
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant MCP as MCP Server
+    participant Client as MCP Client<br/>(Claude Desktop)
+    participant LLM as Client's LLM<br/>(Claude, GPT, etc.)
+
+    User->>MCP: nc_semantic_search_answer("What are my Q1 goals?")
+    MCP->>MCP: Semantic search<br/>(find relevant notes)
+    MCP->>MCP: Construct prompt<br/>(query + documents + instructions)
+    MCP->>Client: Sampling request<br/>(MCP Protocol)
+    Client->>User: Prompt for approval<br/>(optional, client-controlled)
+    User-->>Client: Approve
+    Client->>LLM: Generate answer<br/>(with context)
+    LLM-->>Client: Answer with citations
+    Client-->>MCP: Sampling response
+    MCP-->>User: Generated answer<br/>(with source documents)
+```
+
+### MCP Sampling Architecture
+
+**Why MCP Sampling?**
+- **No server-side LLM**: MCP server has no API keys, doesn't call LLMs directly
+- **Client controls everything**: Which model, who pays, user approval prompts
+- **Privacy**: Documents stay with the client's LLM provider, not a third-party
+- **Flexibility**: Works with any MCP client that supports sampling (Claude Desktop, future clients)
+
+**Prompt Construction:**
+```
+User Query: {query}
+
+Relevant Documents:
+1. Document: {title} (Note)
+   Content: {excerpt}
+
+2. Document: {title} (Note)
+   Content: {excerpt}
+
+Instructions:
+- Provide a comprehensive answer to the user's query
+- Use the documents above as context
+- Include citations: "According to Document 1 (title)..."
+- If documents don't contain enough information, say so
+```
+
+**Graceful Fallback:**
+```python
+try:
+    result = await ctx.session.create_message(...)
+    return answer_with_citations
+except Exception as e:
+    # Fallback: Return documents without generated answer
+    return SearchResponse(
+        generated_answer=f"[Sampling unavailable: {e}]",
+        sources=search_results
+    )
+```
+
+**Client Support:**
+- **Requires**: MCP client with sampling capability
+- **Known support**: Claude Desktop (as of Claude 3.5+)
+- **Graceful degradation**: Returns raw documents if sampling unavailable
+
+## Authentication & Security
+
+### OAuth Scopes
+
+**`semantic:read`** - Search permission
+- Allows using `nc_semantic_search` and `nc_semantic_search_answer` tools
+- Does NOT grant access to documents (verified via app APIs)
+- Required for any semantic search operation
+
+**`semantic:write`** - Sync control permission
+- Allows enabling/disabling background sync (`provision_vector_sync`, `deprovision_vector_sync`)
+- Controls whether user's documents are indexed
+- Currently not implemented in OAuth mode (BasicAuth only)
+
+### Dual-Phase Authorization Pattern
+
+**Phase 1: Scope Check** (semantic:read)
+- Verifies user authorized to search
+- Prevents unauthorized vector database access
+
+**Phase 2: Document Verification** (app-specific APIs)
+- For each search result, fetch via Notes API, CalDAV, etc.
+- If user can fetch → include in results
+- If user cannot fetch (404/401) → filter out
+- **Security**: Vector search cannot leak documents user shouldn't see
+
+**Example Scenario:**
+1. Alice creates note "Secret Project X"
+2. Background sync indexes note with `user_id=alice`
+3. Bob searches for "project"
+4. Vector search finds "Secret Project X" (vector similarity)
+5. Qdrant filters by `user_id=bob` → no match (Alice's note excluded)
+6. Even if Bob somehow got the doc_id, Phase 2 verification would fail (404 Not Found)
+
+### Offline Access for Background Sync
+
+**Why needed:**
+- Background scanner runs hourly without user interaction
+- Requires valid access tokens to fetch documents from Nextcloud APIs
+- User's session token expires after hours/days
+
+**OAuth Mode (ADR-004 Flow 2):**
+- User explicitly provisions offline access via `provision_nextcloud_access` tool
+- Server requests `offline_access` scope → receives refresh token
+- Refresh token stored securely (database, encrypted)
+- Background sync uses refresh tokens to obtain access tokens
+
+**BasicAuth Mode:**
+- Username/password stored in environment variables
+- Always available for background operations
+- Simpler but less secure (credentials never expire)
+
+## Deployment Modes
+
+### Authentication Modes
+
+| Mode | Security | Offline Access | Background Sync | Best For |
+|------|----------|----------------|-----------------|----------|
+| **BasicAuth** | Lower (credentials in env) | Always available | ✅ Works immediately | Single-user, development, testing |
+| **OAuth** | Higher (tokens, scopes) | User must provision | ⚠️ Not yet implemented | Multi-user, production |
+
+**BasicAuth:**
+- Set `NEXTCLOUD_USERNAME` and `NEXTCLOUD_PASSWORD`
+- Background sync works immediately when `VECTOR_SYNC_ENABLED=true`
+- Credentials stored in `.env` file (secure server access required)
+
+**OAuth:**
+- Client authenticates with `semantic:read` scope
+- User must explicitly provision offline access (future: `provision_vector_sync` tool)
+- Background sync only works for users who provisioned access
+- More secure: tokens expire, user controls access
+
+### Qdrant Deployment Modes
+
+| Mode | Configuration | Persistence | Scalability | Best For |
+|------|---------------|-------------|-------------|----------|
+| **In-Memory** (default) | `QDRANT_LOCATION=:memory:` | ❌ Lost on restart | Single instance | Testing, development |
+| **Persistent Local** | `QDRANT_LOCATION=/data/qdrant` | ✅ Survives restarts | Single instance | Small deployments |
+| **Network** | `QDRANT_URL=http://qdrant:6333` | ✅ Dedicated service | ✅ Horizontal scaling | Production |
+
+**In-Memory Mode:**
+```bash
+VECTOR_SYNC_ENABLED=true
+# QDRANT_LOCATION not set → defaults to :memory:
+```
+- Fastest startup
+- No disk I/O
+- **Warning**: All vectors lost when server restarts (must re-index)
+
+**Persistent Local Mode:**
+```bash
+VECTOR_SYNC_ENABLED=true
+QDRANT_LOCATION=/var/lib/qdrant
+```
+- Vectors survive restarts
+- Single server only (no distributed setup)
+- Disk I/O for durability
+
+**Network Mode (Recommended for Production):**
+```bash
+VECTOR_SYNC_ENABLED=true
+QDRANT_URL=http://qdrant:6333
+QDRANT_API_KEY=secret  # optional
+```
+- Dedicated Qdrant service (Docker, Kubernetes)
+- Horizontal scaling (multiple MCP servers → one Qdrant)
+- High availability options
+
+### Embedding Service Options
+
+| Service | Configuration | Cost | Performance | Best For |
+|---------|---------------|------|-------------|----------|
+| **Ollama** (recommended) | `OLLAMA_BASE_URL=http://ollama:11434` | Free (self-hosted) | Fast (local GPU) | Production, development |
+| **OpenAI** (future) | `OPENAI_API_KEY=sk-...` | Paid (API) | Fast (cloud) | Cloud deployments |
+| **Fallback** | No config | Free | Slow (random) | Testing only (not production) |
+
+**Ollama Setup (Recommended):**
+```bash
+# docker-compose.yml
+services:
+  ollama:
+    image: ollama/ollama
+    volumes:
+      - ollama-data:/root/.ollama
+    ports:
+      - "11434:11434"
+
+# Pull embedding model
+docker compose exec ollama ollama pull nomic-embed-text
+```
+
+**Environment Configuration:**
+```bash
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # 768-dimensional vectors
+```
+
+**Model Options:**
+- `nomic-embed-text` (default): 768-dim, optimized for semantic search
+- `all-minilm`: Smaller, faster, slightly less accurate
+- `mxbai-embed-large`: Larger, more accurate, slower
+
+## Configuration Overview
+
+### Key Environment Variables
+
+**Enable Semantic Search:**
+```bash
+VECTOR_SYNC_ENABLED=true  # Default: false (opt-in)
+```
+
+**Qdrant Vector Database:**
+```bash
+# In-memory mode (default if VECTOR_SYNC_ENABLED=true)
+# QDRANT_LOCATION not set → uses :memory:
+
+# Persistent local mode
+QDRANT_LOCATION=/var/lib/qdrant
+
+# Network mode (production)
+QDRANT_URL=http://qdrant:6333
+QDRANT_API_KEY=secret  # optional
+```
+
+**Ollama Embedding Service:**
+```bash
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Default
+```
+
+**Scanner Configuration:**
+```bash
+VECTOR_SYNC_INTERVAL=3600  # Scan interval in seconds (default: 1 hour)
+```
+
+### Resource Requirements
+
+**Qdrant:**
+- **Memory**: ~100-200 MB base + ~1 KB per vector (1M vectors ≈ 1 GB)
+- **Disk**: Persistent mode only, ~200 bytes per vector
+- **CPU**: Low (indexing) to moderate (search)
+
+**Ollama:**
+- **Memory**: 2-4 GB for `nomic-embed-text` model
+- **CPU**: High during embedding generation, idle otherwise
+- **GPU**: Optional but recommended (10-100x faster)
+
+**MCP Server:**
+- **Memory**: +50-100 MB for background sync workers
+- **CPU**: Moderate during scanning/processing, low otherwise
+
+### Trade-offs
+
+| Consideration | In-Memory Qdrant | Persistent Qdrant | Network Qdrant |
+|---------------|------------------|-------------------|----------------|
+| Setup complexity | ✅ Minimal | ✅ Easy | ⚠️ Requires separate service |
+| Durability | ❌ Lost on restart | ✅ Survives restarts | ✅ Survives restarts |
+| Scalability | ❌ Single instance | ❌ Single instance | ✅ Horizontal scaling |
+| Performance | ✅ Fastest | ✅ Fast | ⚠️ Network latency |
+
+## Operational Behavior
+
+### What Happens When VECTOR_SYNC_ENABLED=true
+
+**Immediate (Server Startup):**
+1. MCP server connects to Qdrant (creates collection if needed)
+2. MCP server connects to Ollama (verifies embedding model available)
+3. Background scanner starts (schedules hourly runs)
+4. Document queue and processors initialize
+
+**First Scan (Within 1 hour):**
+1. Scanner fetches all notes from Nextcloud
+2. Compares with Qdrant (likely empty on first run)
+3. Enqueues all notes for indexing
+4. Processors generate embeddings (may take minutes for large note collections)
+5. Vectors stored in Qdrant with user_id filtering
+
+**Hourly Thereafter:**
+1. Scanner fetches all notes
+2. Identifies new/modified/deleted notes (ETag comparison)
+3. Enqueues changes only
+4. Incremental updates processed
+
+### Performance Expectations
+
+**Embedding Generation:**
+- **Without GPU**: 1-5 notes/second (CPU-bound)
+- **With GPU**: 50-200 notes/second (highly parallel)
+- **Initial indexing**: 100 notes ≈ 20-100 seconds (CPU), 1-2 seconds (GPU)
+
+**Search Query:**
+- **Embedding generation**: 50-100ms
+- **Vector search**: 10-50ms (depends on collection size)
+- **Access verification**: 20-100ms per document (Nextcloud API calls)
+- **Total latency**: 100-300ms typical
+
+**Resource Usage:**
+- **Idle**: Minimal (background scanner sleeps)
+- **Scanning**: Moderate CPU (ETag checks, API calls)
+- **Processing**: High CPU/GPU (embedding generation)
+- **Searching**: Low to moderate (depends on query frequency)
+
+### Background Sync Behavior
+
+**Scanner Triggers:**
+- Hourly (configurable via `VECTOR_SYNC_INTERVAL`)
+- Manual trigger via `nc_trigger_vector_sync` (future)
+
+**Queue Processing:**
+- Continuous (workers always running)
+- Batch processing (fetch 10 documents at a time)
+- Concurrent workers (4 by default)
+
+**Error Handling:**
+- Individual document failures logged but don't stop scanning
+- Retries for transient errors (network timeouts, rate limits)
+- Failed documents skipped, re-attempted on next scan
+
+**What Gets Indexed:**
+- **Notes**: All notes accessible to the authenticated user
+- **Future**: Calendar events, tasks, deck cards, files with text extraction, contacts
+
+## Monitoring & Observability
+
+### MCP Tools
+
+**`nc_get_vector_sync_status`** - Check sync status
+```python
+{
+  "total_documents": 1234,
+  "indexed_documents": 1200,
+  "pending_documents": 34,
+  "sync_enabled": true,
+  "last_scan": "2025-01-15T14:30:00Z",
+  "status": "syncing"  # idle | syncing | error
+}
+```
+
+**Interpreting Status:**
+- `idle`: No pending work, last scan completed successfully
+- `syncing`: Currently processing documents
+- `error`: Last scan failed (check logs)
+
+### Logs to Check
+
+**Scanner Logs:**
+```
+[INFO] Vector sync scanner started (interval: 3600s)
+[INFO] Scanning notes: found 150 documents
+[INFO] Changes detected: 5 new, 2 modified, 1 deleted
+[INFO] Enqueued 7 documents for processing
+```
+
+**Processor Logs:**
+```
+[INFO] Processing document: note_123
+[DEBUG] Generated embedding (768 dimensions)
+[INFO] Stored vector in Qdrant: note_123
+```
+
+**Error Logs:**
+```
+[ERROR] Failed to generate embedding for note_123: Connection timeout
+[WARN] Qdrant connection lost, retrying...
+[ERROR] Ollama embedding failed: Model not found
+```
+
+**Log Locations:**
+- **Docker**: `docker compose logs mcp`
+- **Local**: stdout (redirect to file if needed)
+- **Kubernetes**: `kubectl logs -f deployment/nextcloud-mcp-server`
+
+### Metrics to Monitor
+
+**Indexing Progress:**
+- Total documents vs indexed documents
+- Pending queue size
+- Processing rate (docs/second)
+
+**Search Performance:**
+- Query latency (p50, p95, p99)
+- Results per query
+- Verification overhead (API calls per query)
+
+**Resource Usage:**
+- Qdrant memory/disk usage
+- Ollama CPU/GPU usage
+- MCP server memory
+
+For detailed observability setup, see [docs/observability.md](observability.md).
+
+## Troubleshooting from Architecture Perspective
+
+### Documents Not Appearing in Search
+
+**Diagnosis Flow:**
+1. Check sync status: `nc_get_vector_sync_status`
+   - `sync_enabled: false` → Enable with `VECTOR_SYNC_ENABLED=true`
+   - `status: error` → Check scanner logs for failures
+2. Check queue size:
+   - `pending_documents > 0` → Processing in progress, wait
+   - `pending_documents == 0` but `indexed_documents` low → Scan hasn't run yet (wait up to 1 hour)
+3. Check Qdrant:
+   - Connection errors in logs → Verify `QDRANT_URL` or `QDRANT_LOCATION`
+   - Collection empty → First scan hasn't completed
+4. Check Ollama:
+   - Embedding errors in logs → Verify `OLLAMA_BASE_URL`
+   - Model not found → Pull model: `ollama pull nomic-embed-text`
+
+**Common Causes:**
+- Sync disabled (default): Enable `VECTOR_SYNC_ENABLED=true`
+- Ollama not running: Start Ollama service
+- Qdrant not accessible: Check network/URL
+- First scan in progress: Wait up to 1 hour + processing time
+
+### Slow Search Performance
+
+**Diagnosis:**
+1. **Query embedding slow (>500ms)**:
+   - Ollama overloaded or CPU-bound
+   - Solution: Use GPU, upgrade CPU, or reduce concurrent requests
+2. **Vector search slow (>200ms)**:
+   - Large collection (millions of vectors)
+   - Solution: Use network Qdrant with SSDs, add indexing
+3. **Verification slow (>500ms)**:
+   - Many results to verify (10+ documents)
+   - Nextcloud API slow or overloaded
+   - Solution: Reduce `limit` parameter, optimize Nextcloud
+
+**Performance Tuning:**
+- Reduce search `limit` (default: 10 results)
+- Use network Qdrant for large collections
+- Enable Ollama GPU acceleration
+- Check Nextcloud API response times
+
+### Background Sync Stopped
+
+**Diagnosis:**
+1. Check logs for errors:
+   - Authentication failures (401/403) → Token expired (OAuth) or credentials invalid (BasicAuth)
+   - Connection timeouts → Network issues with Nextcloud/Qdrant/Ollama
+   - Rate limiting (429) → Reduce scan frequency
+2. Check `nc_get_vector_sync_status`:
+   - `status: error` → See logs for details
+   - `last_scan` timestamp old (>2 hours) → Scanner may have crashed
+3. Verify services:
+   - Qdrant accessible: `curl http://qdrant:6333/`
+   - Ollama accessible: `curl http://ollama:11434/api/tags`
+   - Nextcloud accessible: Check API health
+
+**OAuth Mode (Future):**
+- Offline access token expired → Re-provision via `provision_vector_sync`
+- User deprovisioned access → Sync stops intentionally
+
+### Out of Memory
+
+**Diagnosis:**
+1. Check Qdrant mode:
+   - In-memory mode with large collection → Switch to persistent or network mode
+2. Check embedding batch size:
+   - Too many documents processed simultaneously → Reduce worker count
+3. Check Ollama memory:
+   - Large models loaded → Use smaller embedding model
+
+**Solutions:**
+- Use persistent or network Qdrant (frees server memory)
+- Reduce concurrent processor workers
+- Use smaller embedding model (`all-minilm` instead of `nomic-embed-text`)
+- Increase server memory allocation
+
+## Limitations & Future Work
+
+### Current Limitations
+
+1. **Notes App Only**
+   - Architecture supports multiple apps (plugin system ready)
+   - Only `NotesScanner` and `NotesProcessor` implemented
+   - Future: Calendar, Deck, Files, Contacts
+
+2. **MCP Sampling Support**
+   - `nc_semantic_search_answer` requires client sampling capability
+   - Not all MCP clients support sampling yet
+   - Graceful fallback: Returns documents without generated answer
+
+3. **OAuth Background Sync**
+   - User-controlled background jobs not yet implemented
+   - Currently works in BasicAuth mode only
+   - Future: Users opt-in via `provision_vector_sync` tool
+
+4. **No Incremental Updates**
+   - Document changes trigger full re-embedding
+   - Cannot update just modified paragraphs
+   - Future: Paragraph-level chunking and incremental updates
+
+5. **No Query Caching**
+   - Each search generates new query embedding
+   - Repeated queries re-search Qdrant
+   - Future: Cache recent query embeddings and results
+
+6. **Single Embedding Model**
+   - Uses one model for all documents and queries
+   - Cannot customize per app or user
+   - Future: App-specific or user-selected models
+
+### Future Enhancements
+
+**Multi-App Support** (In Progress):
+- Scanner plugins for Calendar, Deck, Files, Contacts
+- Unified vector search across all apps
+- App-specific metadata in vector payloads
+
+**User-Controlled Sync (OAuth Mode)**:
+- `provision_vector_sync` and `deprovision_vector_sync` tools
+- Per-user background job scheduling
+- User dashboard for sync status and controls
+
+**Advanced Search Features**:
+- Hybrid search (vector + keyword combined)
+- Filtering by date range, app type, tags
+- Aggregations and faceted search
+- Search result explanations (why this matched)
+
+**Performance Optimizations**:
+- Query caching for repeated searches
+- Incremental document updates (paragraph-level)
+- Batch query processing
+- Qdrant HNSW indexing tuning
+
+**Embedding Improvements**:
+- Support for OpenAI embeddings (ada-002, text-embedding-3)
+- Multi-language embedding models
+- Fine-tuned models for Nextcloud content
+- Paragraph-level chunking for long documents
+
+## References
+
+### Architecture Decision Records (ADRs)
+
+- **[ADR-003: Vector Database Semantic Search](ADR-003-vector-database-semantic-search.md)** - Qdrant selection rationale, embedding strategy, hybrid search (superseded by ADR-007 but technical decisions remain valid)
+- **[ADR-007: Background Vector Sync Job Management](ADR-007-background-vector-sync-job-management.md)** - Current implementation, Scanner-Queue-Processor architecture, plugin system
+- **[ADR-008: MCP Sampling for Semantic Search](ADR-008-mcp-sampling-for-semantic-search.md)** - RAG with MCP sampling, client-server separation, prompt construction
+- **[ADR-009: Semantic Search OAuth Scope](ADR-009-semantic-search-oauth-scope.md)** - OAuth scope model, dual-phase authorization, security rationale
+
+### Configuration & Setup
+
+- **[Configuration Guide](configuration.md)** - Environment variables, Qdrant setup, Ollama setup, detailed configuration options
+- **[Installation Guide](installation.md)** - Deployment options (Docker, Kubernetes, local)
+- **[Running the Server](running.md)** - Starting the server, transport options, testing
+
+### Monitoring & Troubleshooting
+
+- **[Observability Guide](observability.md)** - Logging, metrics, tracing, debugging
+- **[Troubleshooting](troubleshooting.md)** - General issues and solutions
+
+### Related Documentation
+
+- **[OAuth Architecture](oauth-architecture.md)** - OAuth flows, scopes, token management
+- **[Comparison with Context Agent](comparison-context-agent.md)** - When to use Nextcloud MCP Server vs Context Agent
+
+---
+
+**Questions or Issues?**
+- [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues)
+- [Contribute improvements](https://github.com/cbcoutinho/nextcloud-mcp-server/pulls)
@@ -0,0 +1,93 @@
+# Vector Sync UI Guide
+
+This guide covers the browser-based interface for the Nextcloud MCP Server's semantic search and vector synchronization features.
+
+## Overview
+
+The Vector Sync UI (`/app`) provides an interactive interface to test semantic search queries and visualize results from your Nextcloud documents. It exposes the same retrieval capabilities that LLMs use in Retrieval-Augmented Generation (RAG) workflows, powered by Alpine.js for reactive state, htmx for dynamic updates, and Plotly.js for 3D visualization.
+
+**Supported Apps**: Notes, Files (text/PDF), Calendar (events/tasks), Contacts (CardDAV), and Deck are indexed and searchable.
+
+## Accessing the UI
+
+Navigate to `/app` after authentication:
+- **BasicAuth mode**: `http://localhost:8000/app` (uses credentials from environment)
+- **OAuth mode**: `http://localhost:8000/app` (redirects to login if not authenticated)
+
+## Tabs
+
+### Welcome Page
+
+Landing page that introduces semantic search and RAG workflows. Shows authentication status, explains how vector embeddings work, and provides feature navigation. Adapts content based on whether `VECTOR_SYNC_ENABLED=true`.
+
+### User Info
+
+Displays authentication details and session information:
+- **BasicAuth**: Username, mode badge, Nextcloud host
+- **OAuth**: Username, session ID (truncated), background access status, IdP profile, revocation option
+
+### Vector Sync Status
+
+Real-time monitoring of document indexing:
+- **Indexed Documents**: Total chunks stored in Qdrant vector database (immediately searchable)
+- **Pending Documents**: Queue awaiting embedding processing
+- **Status**: "✓ Idle" (green) when up-to-date, "⟳ Syncing" (orange) during processing
+
+Auto-refreshes every 10 seconds via htmx. Check this tab after adding content to verify indexing completion.
+
+### Vector Visualization
+
+Interactive search interface with 3D PCA plot of semantic space.
+
+**Search Controls**:
+- **Query**: Natural language search (e.g., "health benefits of coffee")
+- **Algorithm**: Semantic (Dense) for pure vector search, or BM25 Hybrid (default) combining vectors + keywords
+- **Fusion** (Hybrid only): RRF (Reciprocal Rank Fusion) or DBSF (Distribution-Based Score Fusion)
+- **Advanced**: Filter by document type, adjust score threshold (0.0-1.0), set result limit (max 100)
+
+**3D Visualization**:
+
+The plot uses Principal Component Analysis (PCA) to reduce 768-dimensional embeddings to 3D. Documents are positioned by semantic similarity with the query point shown in red. Point size and opacity indicate relevance, and the Viridis color scale shows relative scores (yellow = highest match).
+
+**Critical Fix**: Vectors are L2-normalized before PCA to match Qdrant's cosine distance, ensuring query points position accurately near similar documents. Without normalization, magnitude differences cause misleading spatial separation.
+
+**Results List**:
+
+Each result shows document title (clickable link to Nextcloud), excerpt, raw score, relative percentage, and document type. Click "Show Chunk" to view the matched text segment with surrounding context (up to 500 characters before/after).
+
+## Configuration
+
+**Required**:
+```bash
+VECTOR_SYNC_ENABLED=true
+```
+
+**Optional** (for browser-accessible links):
+```bash
+NEXTCLOUD_PUBLIC_ISSUER_URL=https://your-public-nextcloud-url.com
+```
+
+**Admin Access**: Webhooks tab only visible to Nextcloud admins (verified via Provisioning API).
+
+## Use Cases
+
+**Testing Search Queries**: Preview results before they reach LLMs in RAG workflows. Compare semantic vs. hybrid algorithms, verify relevance scores, and validate that correct documents are retrieved. Use chunk context to see exactly which text segments match and why unexpected documents appear.
+
+**Monitoring Indexing**: Track real-time progress after creating or modifying documents. Check if the queue is backing up (high pending count) or confirm the system is idle after bulk imports. Verify documents become searchable immediately after indexing completes.
+
+**Algorithm Comparison**: Pure semantic search excels at conceptual queries and synonyms. BM25 hybrid combines semantic understanding with precise keyword matching for better accuracy on specific terms. Experiment with RRF vs. DBSF fusion for different score distributions.
+
+## Troubleshooting
+
+**Vector Sync Tab Not Visible**: Set `VECTOR_SYNC_ENABLED=true` and restart the server.
+
+**No Search Results**: Check Vector Sync Status to confirm documents are indexed (not just pending). Try broader queries or lower the score threshold in Advanced options. Initial indexing may take time depending on document volume.
+
+**Links to Nextcloud Apps Not Working**: Set `NEXTCLOUD_PUBLIC_ISSUER_URL` to your browser-accessible Nextcloud URL for correct link generation.
+
+## Related Documentation
+
+- [Configuration Guide](../configuration.md) - Environment variables and settings
+- [Authentication Modes](../authentication.md) - BasicAuth vs OAuth setup
+- [Installation Guide](../installation.md) - Getting started
+- [ADR-008: MCP Sampling for RAG](../ADR-008-mcp-sampling-for-rag.md) - Technical details on RAG workflows
@@ -124,3 +124,75 @@ ENABLE_CUSTOM_PROCESSOR=false

 # Comma-separated MIME types your processor supports
 #CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg,image/png
+
+# ============================================
+# Semantic Search & Vector Sync Configuration
+# ============================================
+# EXPERIMENTAL: Semantic search for Notes app (multi-app support planned)
+# Requires: Qdrant vector database + Ollama embedding service
+# Disabled by default
+
+# Enable background vector indexing
+VECTOR_SYNC_ENABLED=false
+
+# Document scan interval in seconds (default: 300 = 5 minutes)
+# How often to check for new/updated documents
+#VECTOR_SYNC_SCAN_INTERVAL=300
+
+# Concurrent indexing workers (default: 3)
+# Number of parallel workers for embedding generation
+#VECTOR_SYNC_PROCESSOR_WORKERS=3
+
+# Max queued documents (default: 10000)
+# Maximum documents waiting to be processed
+#VECTOR_SYNC_QUEUE_MAX_SIZE=10000
+
+# ============================================
+# Qdrant Vector Database Configuration
+# ============================================
+# Choose ONE of three modes:
+# 1. In-memory mode (default): Set neither QDRANT_URL nor QDRANT_LOCATION
+# 2. Persistent local: Set QDRANT_LOCATION=/path/to/data
+# 3. Network mode: Set QDRANT_URL=http://qdrant:6333
+
+# Network mode: URL to Qdrant service
+#QDRANT_URL=http://qdrant:6333
+
+# Local mode: Path to store vectors (use :memory: for in-memory)
+#QDRANT_LOCATION=:memory:
+
+# API key for network mode (optional)
+#QDRANT_API_KEY=
+
+# Collection name (optional - auto-generated if not set)
+# Auto-generation format: {deployment-id}-{model-name}
+# Allows safe model switching and multi-server deployments
+#QDRANT_COLLECTION=nextcloud_content
+
+# ============================================
+# Ollama Embedding Service Configuration
+# ============================================
+# Ollama endpoint for embeddings (if not set, uses SimpleEmbeddingProvider fallback)
+#OLLAMA_BASE_URL=http://ollama:11434
+
+# Embedding model to use (default: nomic-embed-text, 768 dimensions)
+# Changing this creates a new collection (requires re-embedding all documents)
+#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Verify SSL certificates (default: true)
+#OLLAMA_VERIFY_SSL=true
+
+# ============================================
+# Document Chunking Configuration
+# ============================================
+# Configure how documents are split before embedding
+
+# Words per chunk (default: 512)
+# Smaller chunks (256-384): More precise, less context, more storage
+# Larger chunks (768-1024): More context, less precise, less storage
+#DOCUMENT_CHUNK_SIZE=512
+
+# Overlapping words between chunks (default: 50)
+# Recommended: 10-20% of chunk size
+# Preserves context across chunk boundaries
+#DOCUMENT_CHUNK_OVERLAP=50
@@ -751,6 +751,40 @@
        "display.on.consent.screen": "true",
        "consent.screen.text": "Create, update, and delete tasks"
      }
+    },
+    {
+      "name": "default-audience",
+      "protocol": "openid-connect",
+      "attributes": {
+        "include.in.token.scope": "false",
+        "display.on.consent.screen": "false",
+        "gui.order": "",
+        "consent.screen.text": ""
+      },
+      "protocolMappers": [
+        {
+          "name": "mcp-server-audience",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-audience-mapper",
+          "consentRequired": false,
+          "config": {
+            "included.client.audience": "nextcloud-mcp-server",
+            "access.token.claim": "true",
+            "id.token.claim": "false"
+          }
+        },
+        {
+          "name": "mcp-url-audience",
+          "protocol": "openid-connect",
+          "protocolMapper": "oidc-audience-mapper",
+          "consentRequired": false,
+          "config": {
+            "included.custom.audience": "http://localhost:8002",
+            "access.token.claim": "true",
+            "id.token.claim": "false"
+          }
+        }
+      ]
    }
  ],
  "components": {
@@ -791,7 +825,8 @@
    "profile",
    "email",
    "roles",
-    "web-origins"
+    "web-origins",
+    "default-audience"
  ],
  "defaultOptionalClientScopes": [
    "offline_access",
@@ -14,11 +14,11 @@ from .scope_authorization import (
    is_jwt_token,
    require_scopes,
 )
-from .token_verifier import NextcloudTokenVerifier
+from .unified_verifier import UnifiedTokenVerifier

 __all__ = [
    "BearerAuth",
-    "NextcloudTokenVerifier",
+    "UnifiedTokenVerifier",
    "register_client",
    "ensure_oauth_client",
    "get_client_from_context",
@@ -1,12 +1,14 @@
 """Browser-based OAuth login routes for admin UI.

 Separate from MCP OAuth flow - these routes establish browser sessions
-for accessing admin UI endpoints like /user/page.
+for accessing admin UI endpoints like /app.
 """

+import hashlib
 import logging
 import os
 import secrets
+from base64 import urlsafe_b64encode
 from urllib.parse import urlencode

 import httpx
@@ -36,8 +38,8 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    """
    oauth_ctx = request.app.state.oauth_context
    if not oauth_ctx:
-        # BasicAuth mode - no login needed, redirect to user page
-        return RedirectResponse("/user/page", status_code=302)
+        # BasicAuth mode - no login needed, redirect to app
+        return RedirectResponse("/app", status_code=302)

    storage = oauth_ctx["storage"]
    oauth_client = oauth_ctx["oauth_client"]
@@ -53,39 +55,36 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:

    # Build OAuth authorization URL
    mcp_server_url = oauth_config["mcp_server_url"]
-    callback_uri = f"{mcp_server_url}/oauth/login-callback"
+    callback_uri = f"{mcp_server_url}/oauth/callback"

    # Request only basic OIDC scopes for browser session
    # Note: Nextcloud app scopes (notes:read, etc.) are for MCP client access tokens,
    # not for the MCP server's own browser authentication
    scopes = "openid profile email offline_access"

-    code_challenge = ""
-    code_verifier = ""
+    # Generate PKCE values for ALL modes (both external and integrated IdP require PKCE)
+    code_verifier = secrets.token_urlsafe(32)
+    digest = hashlib.sha256(code_verifier.encode()).digest()
+    code_challenge = urlsafe_b64encode(digest).decode().rstrip("=")
+
+    # Store code_verifier in session for retrieval during callback (using state as key)
+    await storage.store_oauth_session(
+        session_id=state,  # Use state as session ID
+        client_id="browser-ui",
+        client_redirect_uri="/app",
+        state=state,
+        code_challenge=code_challenge,
+        code_challenge_method="S256",
+        mcp_authorization_code=code_verifier,  # Store code_verifier here temporarily
+        flow_type="browser",
+        ttl_seconds=600,  # 10 minutes
+    )

    if oauth_client:
        # External IdP mode (Keycloak)
-        # Keycloak requires PKCE, so generate code_verifier and code_challenge
        if not oauth_client.authorization_endpoint:
            await oauth_client.discover()

-        # Generate PKCE values
-        code_verifier, code_challenge = oauth_client.generate_pkce_challenge()
-
-        # Store code_verifier temporarily (using state as key)
-        # We'll retrieve it in the callback using the state parameter
-        await storage.store_oauth_session(
-            session_id=state,  # Use state as session ID
-            client_id="browser-ui",
-            client_redirect_uri="/user/page",
-            state=state,
-            code_challenge=code_challenge,
-            code_challenge_method="S256",
-            mcp_authorization_code=code_verifier,  # Store code_verifier here temporarily
-            flow_type="browser",
-            ttl_seconds=600,  # 10 minutes
-        )
-
        idp_params = {
            "client_id": oauth_client.client_id,
            "redirect_uri": callback_uri,
@@ -138,6 +137,8 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
            "response_type": "code",
            "scope": scopes,
            "state": state,
+            "code_challenge": code_challenge,
+            "code_challenge_method": "S256",
            "prompt": "consent",  # Ensure refresh token
        }

@@ -213,20 +214,18 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
    oauth_client = oauth_ctx["oauth_client"]
    oauth_config = oauth_ctx["config"]

-    # Retrieve code_verifier from session storage (if using PKCE)
+    # Retrieve code_verifier from session storage (PKCE required for all modes)
    code_verifier = ""
-    if oauth_client:
-        # For Keycloak (external IdP), we stored the code_verifier in the session
-        oauth_session = await storage.get_oauth_session(state)
-        if oauth_session:
-            # code_verifier was stored in mcp_authorization_code field
-            code_verifier = oauth_session.get("mcp_authorization_code", "")
-            # Clean up the temporary session
-            # Note: We don't have delete_oauth_session method, but it will expire after TTL
+    oauth_session = await storage.get_oauth_session(state)
+    if oauth_session:
+        # code_verifier was stored in mcp_authorization_code field
+        code_verifier = oauth_session.get("mcp_authorization_code", "")
+        # Clean up the temporary session
+        # Note: We don't have delete_oauth_session method, but it will expire after TTL

    # Exchange authorization code for tokens
    mcp_server_url = oauth_config["mcp_server_url"]
-    callback_uri = f"{mcp_server_url}/oauth/login-callback"
+    callback_uri = f"{mcp_server_url}/oauth/callback"

    try:
        if oauth_client:
@@ -263,16 +262,22 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
                discovery = response.json()
                token_endpoint = discovery["token_endpoint"]

+            token_params = {
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": callback_uri,
+                "client_id": oauth_config["client_id"],
+                "client_secret": oauth_config["client_secret"],
+            }
+
+            # Add code_verifier for PKCE (required by Nextcloud OIDC)
+            if code_verifier:
+                token_params["code_verifier"] = code_verifier
+
            async with httpx.AsyncClient() as http_client:
                response = await http_client.post(
                    token_endpoint,
-                    data={
-                        "grant_type": "authorization_code",
-                        "code": code,
-                        "redirect_uri": callback_uri,
-                        "client_id": oauth_config["client_id"],
-                        "client_secret": oauth_config["client_secret"],
-                    },
+                    data=token_params,
                )
                response.raise_for_status()
                token_data = response.json()
@@ -336,13 +341,18 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
    # Store refresh token (for background jobs ONLY)
    if refresh_token:
        logger.info(f"Storing refresh token for user_id: {user_id}")
+        logger.info(f"  State parameter (provisioning_client_id): {state[:16]}...")
        await storage.store_refresh_token(
            user_id=user_id,
            refresh_token=refresh_token,
            expires_at=None,
            flow_type="browser",  # Browser-based login flow
+            provisioning_client_id=state,  # Store state for unified session lookup
        )
        logger.info(f"✓ Refresh token stored successfully for user_id: {user_id}")
+        logger.info(
+            f"  Token can now be found via provisioning_client_id={state[:16]}..."
+        )
    else:
        logger.warning("No refresh token in token response - cannot store session")

@@ -373,7 +383,7 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
            # Continue anyway - profile cache is optional for browser UI

    # Create response and set session cookie
-    response = RedirectResponse("/user/page", status_code=302)
+    response = RedirectResponse("/app", status_code=302)
    response.set_cookie(
        key="mcp_session",
        value=user_id,
@@ -8,7 +8,7 @@ from typing import Any
 import anyio
 import httpx

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -79,19 +79,22 @@ async def register_client(
    client_name: str = "Nextcloud MCP Server",
    redirect_uris: list[str] | None = None,
    scopes: str = "openid profile email",
-    token_type: str = "Bearer",
+    token_type: str | None = "Bearer",
    resource_url: str | None = None,
 ) -> ClientInfo:
    """
-    Register a new OAuth client with Nextcloud OIDC using dynamic client registration.
+    Register a new OAuth client using RFC 7591 Dynamic Client Registration.
+
+    This function supports both Nextcloud OIDC and standard OIDC providers like Keycloak.

    Args:
-        nextcloud_url: Base URL of the Nextcloud instance
+        nextcloud_url: Base URL of the OIDC provider
        registration_endpoint: Full URL to the registration endpoint
        client_name: Name of the client application
        redirect_uris: List of redirect URIs (default: http://localhost:8000/oauth/callback)
        scopes: Space-separated list of scopes to request
-        token_type: Type of access tokens to issue (default: "Bearer", also supports "JWT")
+        token_type: Type of access tokens (default: "Bearer", supports "JWT" for Nextcloud).
+                    Set to None to omit this field (required for Keycloak and other standard providers).
        resource_url: OAuth 2.0 Protected Resource URL (RFC 9728) - used for token introspection authorization

    Returns:
@@ -100,6 +103,11 @@ async def register_client(
    Raises:
        httpx.HTTPStatusError: If registration fails
        ValueError: If response is invalid
+
+    Note:
+        The token_type parameter is a Nextcloud-specific extension and is not part of RFC 7591.
+        Standard OIDC providers like Keycloak do not accept this field and will return a 400 error
+        if it's included. Set token_type=None when registering with Keycloak or other standard providers.
    """
    if redirect_uris is None:
        redirect_uris = ["http://localhost:8000/oauth/callback"]
@@ -111,9 +119,12 @@ async def register_client(
        "grant_types": ["authorization_code", "refresh_token"],
        "response_types": ["code"],
        "scope": scopes,
-        "token_type": token_type,
    }

+    # Add token_type if provided (Nextcloud-specific, not RFC 7591 standard)
+    if token_type is not None:
+        client_metadata["token_type"] = token_type
+
    # Add resource_url if provided (RFC 9728)
    if resource_url:
        client_metadata["resource_url"] = resource_url
@@ -1,45 +1,55 @@
-"""Helper functions for extracting OAuth context from MCP requests."""
+"""Helper functions for extracting OAuth context from MCP requests.

+ADR-005 compliant implementation with token exchange caching.
+"""
+
+import hashlib
 import logging
+import time

 from mcp.server.auth.provider import AccessToken
 from mcp.server.fastmcp import Context

 from ..client import NextcloudClient
 from ..config import get_settings
+from ..observability.metrics import (
+    oauth_token_cache_hits_total,
+    oauth_token_exchange_total,
+)
 from .token_exchange import exchange_token_for_audience

 logger = logging.getLogger(__name__)

+# Token exchange cache: token_hash -> (exchanged_token, expiry_timestamp)
+_exchange_cache: dict[str, tuple[str, float]] = {}
+

 def get_client_from_context(ctx: Context, base_url: str) -> NextcloudClient:
    """
-    Extract authenticated user context from MCP request and create NextcloudClient.
+    Create NextcloudClient for multi-audience mode (no exchange needed).

-    This function retrieves the OAuth access token from the MCP context,
-    extracts the username from the token's resource field (where we stored it
-    during token verification), and creates a NextcloudClient with bearer auth.
+    ADR-005 Mode 1: Use multi-audience tokens directly.
+    The UnifiedTokenVerifier validated MCP audience per RFC 7519.
+    Nextcloud will independently validate its own audience.

    Args:
        ctx: MCP request context containing session info
        base_url: Nextcloud base URL

    Returns:
-        NextcloudClient configured with bearer token auth
+        NextcloudClient configured with multi-audience token

    Raises:
        AttributeError: If context doesn't contain expected OAuth session data
        ValueError: If username cannot be extracted from token
    """
    try:
-        # In Starlette with FastMCP OAuth, the authenticated user info is stored in request.user
-        # The FastMCP auth middleware sets request.user to an AuthenticatedUser object
-        # which contains the access_token
+        # Extract validated access token from MCP context
        if hasattr(ctx.request_context.request, "user") and hasattr(
            ctx.request_context.request.user, "access_token"
        ):
            access_token: AccessToken = ctx.request_context.request.user.access_token
-            logger.debug("Retrieved access token from request.user for OAuth request")
+            logger.debug("Retrieved multi-audience token from request.user")
        else:
            logger.error(
                "OAuth authentication failed: No access token found in request"
@@ -47,16 +57,20 @@ def get_client_from_context(ctx: Context, base_url: str) -> NextcloudClient:
            raise AttributeError("No access token found in OAuth request context")

        # Extract username from resource field (RFC 8707)
-        # We stored the username here during token verification
+        # UnifiedTokenVerifier stored the username here during validation
        username = access_token.resource

        if not username:
            logger.error("No username found in access token resource field")
            raise ValueError("Username not available in OAuth token context")

-        logger.debug(f"Creating OAuth NextcloudClient for user: {username}")
+        logger.debug(
+            f"Creating NextcloudClient for user {username} with multi-audience token "
+            f"(no exchange needed)"
+        )

-        # Create client with bearer token
+        # Token was validated to have MCP audience
+        # Nextcloud will validate its own audience independently
        return NextcloudClient.from_token(
            base_url=base_url, token=access_token.token, username=username
        )
@@ -71,12 +85,19 @@ async def get_session_client_from_context(
    ctx: Context, base_url: str
 ) -> NextcloudClient:
    """
-    Create NextcloudClient using RFC 8693 token exchange for session operations.
+    Create NextcloudClient using RFC 8693 token exchange with caching.
+
+    ADR-005 Mode 2: Exchange MCP token for Nextcloud token via RFC 8693.

    This implements the token exchange pattern where:
-    1. Extract Flow 1 token from context (aud: "mcp-server")
-    2. Exchange it for ephemeral Nextcloud token via RFC 8693
-    3. Create client with delegated token (NOT stored)
+    1. Extract MCP token from context (validated by UnifiedTokenVerifier)
+    2. Check cache for existing exchanged token
+    3. If not cached or expired, exchange via RFC 8693
+    4. Cache the exchanged token to minimize exchange frequency
+    5. Create client with exchanged token
+
+    CRITICAL: This is where token exchange happens, NOT in the verifier.
+    The verifier already validated the MCP audience; now we exchange for Nextcloud.

    Note: Nextcloud doesn't support OAuth scopes natively. Scopes are enforced
    by the MCP server via @require_scopes decorator, not by the IdP. Therefore,
@@ -88,7 +109,7 @@ async def get_session_client_from_context(
        base_url: Nextcloud base URL

    Returns:
-        NextcloudClient configured with ephemeral delegated token
+        NextcloudClient configured with ephemeral exchanged token

    Raises:
        AttributeError: If context doesn't contain expected OAuth session data
@@ -96,43 +117,68 @@ async def get_session_client_from_context(
    """
    settings = get_settings()

-    # Check if token exchange is enabled
-    if not settings.enable_token_exchange:
-        logger.info("Token exchange disabled, falling back to standard OAuth flow")
-        return get_client_from_context(ctx, base_url)
-
    try:
-        # Extract Flow 1 token from context
+        # Extract MCP token from context
        if hasattr(ctx.request_context.request, "user") and hasattr(
            ctx.request_context.request.user, "access_token"
        ):
            access_token: AccessToken = ctx.request_context.request.user.access_token
-            flow1_token = access_token.token
-            username = access_token.resource  # Username stored during verification
-            logger.debug(f"Retrieved Flow 1 token for user: {username}")
+            mcp_token = access_token.token
+            username = access_token.resource  # Username from UnifiedTokenVerifier
+            logger.debug(f"Retrieved MCP token for user: {username}")
        else:
-            logger.error("No Flow 1 token found in request context")
+            logger.error("No MCP token found in request context")
            raise AttributeError("No access token found in OAuth request context")

        if not username:
            logger.error("No username found in access token resource field")
            raise ValueError("Username not available in OAuth token context")

-        logger.info("Exchanging client token for Nextcloud API token (pure RFC 8693)")
+        # Check cache for existing exchanged token
+        cache_key = hashlib.sha256(mcp_token.encode()).hexdigest()
+        if cache_key in _exchange_cache:
+            cached_token, expiry = _exchange_cache[cache_key]
+            if time.time() < expiry:
+                logger.debug(
+                    f"Using cached exchanged token (expires in {expiry - time.time():.1f}s)"
+                )
+                oauth_token_cache_hits_total.labels(hit="true").inc()
+                return NextcloudClient.from_token(
+                    base_url=base_url, token=cached_token, username=username
+                )
+            else:
+                logger.debug("Cached token expired, removing from cache")
+                del _exchange_cache[cache_key]

-        # Perform pure RFC 8693 token exchange (no refresh tokens)
-        # Note: We don't pass scopes since Nextcloud doesn't enforce them.
-        # The MCP server's @require_scopes decorator handles authorization.
-        exchanged_token, expires_in = await exchange_token_for_audience(
-            subject_token=flow1_token,
-            requested_audience="nextcloud",
-            requested_scopes=None,  # Nextcloud doesn't support scopes
-        )
+        oauth_token_cache_hits_total.labels(hit="false").inc()

-        logger.info(f"Pure token exchange successful. Token expires in {expires_in}s")
+        # Perform RFC 8693 token exchange
+        logger.info(f"Exchanging MCP token for Nextcloud API token (user: {username})")
+
+        try:
+            # Exchange for Nextcloud resource URI audience
+            exchanged_token, expires_in = await exchange_token_for_audience(
+                subject_token=mcp_token,
+                requested_audience=settings.nextcloud_resource_uri or "nextcloud",
+                requested_scopes=None,  # Nextcloud doesn't support scopes
+            )
+            oauth_token_exchange_total.labels(status="success").inc()
+
+            logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+        except Exception:
+            oauth_token_exchange_total.labels(status="error").inc()
+            raise
+
+        # Cache the exchanged token
+        # Use the minimum of exchange TTL and configured cache TTL
+        cache_ttl = min(expires_in, settings.token_exchange_cache_ttl)
+        _exchange_cache[cache_key] = (exchanged_token, time.time() + cache_ttl)
+        logger.debug(f"Cached exchanged token for {cache_ttl}s")
+
+        # Clean up expired cache entries
+        _cleanup_exchange_cache()

        # Create client with exchanged token
-        # This token is ephemeral (per-request) and NOT stored
        return NextcloudClient.from_token(
            base_url=base_url, token=exchanged_token, username=username
        )
@@ -143,3 +189,21 @@ async def get_session_client_from_context(
    except Exception as e:
        logger.error(f"Token exchange failed: {e}")
        raise RuntimeError(f"Token exchange required but failed: {e}") from e
+
+
+def _cleanup_exchange_cache():
+    """Remove expired entries from the token exchange cache."""
+    global _exchange_cache
+    now = time.time()
+    expired_keys = [k for k, (_, expiry) in _exchange_cache.items() if expiry <= now]
+    for key in expired_keys:
+        del _exchange_cache[key]
+    if expired_keys:
+        logger.debug(f"Cleaned up {len(expired_keys)} expired cache entries")
+
+
+def clear_exchange_cache():
+    """Clear the entire token exchange cache. Useful for testing."""
+    global _exchange_cache
+    _exchange_cache.clear()
+    logger.debug("Token exchange cache cleared")
@@ -90,6 +90,8 @@ class KeycloakOAuthClient:
            )

        # Parse server URL to construct redirect URI
+        # Note: This is for OAuth client initialization, not used for actual redirects
+        # since this client is used for backend token operations (exchange, refresh)
        parsed_url = urlparse(server_url)
        redirect_uri = f"{parsed_url.scheme}://{parsed_url.netloc}/oauth/callback"

@@ -1,7 +1,7 @@
 """
-OAuth 2.0 Login Routes for ADR-004 Progressive Consent Architecture
+OAuth 2.0 Login Routes for ADR-004 (Offline Access Architecture)

-Implements dual OAuth flows with explicit provisioning:
+Implements dual OAuth flows with optional offline access provisioning:

 Flow 1: Client Authentication - MCP client authenticates directly to IdP
 - Client requests: Nextcloud MCP resource scopes (notes:*, calendar:*, etc.)
@@ -19,8 +19,11 @@ Flow 2: Resource Provisioning - MCP server gets delegated Nextcloud access

 """

+import hashlib
 import logging
 import os
+import secrets
+from base64 import urlsafe_b64encode
 from urllib.parse import urlencode

 import httpx
@@ -29,7 +32,7 @@ from starlette.requests import Request
 from starlette.responses import JSONResponse, RedirectResponse

 from nextcloud_mcp_server.auth.client_registry import get_client_registry
-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -118,7 +121,7 @@ async def oauth_authorize(request: Request) -> RedirectResponse | JSONResponse:
            status_code=400,
        )

-    # Validate client_id (required for Progressive Consent Flow 1)
+    # Validate client_id (required for Flow 1)
    if not client_id:
        return JSONResponse(
            {
@@ -168,7 +171,7 @@ async def oauth_authorize(request: Request) -> RedirectResponse | JSONResponse:
    # The MCP server does NOT see the IdP authorization code!

    logger.info(
-        f"Starting Progressive Consent Flow 1 - no server session needed, "
+        f"Starting Flow 1 - no server session needed, "
        f"client will handle IdP response directly at {redirect_uri}"
    )

@@ -188,7 +191,7 @@ async def oauth_authorize(request: Request) -> RedirectResponse | JSONResponse:
    # Use client's own client_id (client must be pre-registered at IdP)
    idp_client_id = client_id

-    logger.info("Flow 1 (Progressive Consent): Direct client auth to IdP")
+    logger.info("Flow 1: Direct client auth to IdP")
    logger.info(f"  Client ID: {client_id}")
    logger.info(f"  Client will receive IdP code directly at: {callback_uri}")
    logger.info(f"  Scopes: {scopes} (resource access for MCP tools)")
@@ -252,6 +255,7 @@ async def oauth_authorize(request: Request) -> RedirectResponse | JSONResponse:
            "scope": scopes,
            "state": idp_state,
            "prompt": "consent",  # Ensure refresh token
+            "resource": f"{oauth_config['mcp_server_url']}/mcp",  # MCP server audience
        }

        auth_url = f"{authorization_endpoint}?{urlencode(idp_params)}"
@@ -313,12 +317,31 @@ async def oauth_authorize_nextcloud(
        )

    mcp_server_url = oauth_config["mcp_server_url"]
-    callback_uri = f"{mcp_server_url}/oauth/callback-nextcloud"
+    callback_uri = f"{mcp_server_url}/oauth/callback"

    # Flow 2: Server only needs identity + offline access (no resource scopes)
    # Resource scopes are requested by client in Flow 1
    scopes = "openid profile email offline_access"

+    # Generate PKCE values (required by Nextcloud OIDC)
+    code_verifier = secrets.token_urlsafe(32)
+    digest = hashlib.sha256(code_verifier.encode()).digest()
+    code_challenge = urlsafe_b64encode(digest).decode().rstrip("=")
+
+    # Store code_verifier in session for retrieval during callback
+    storage = oauth_ctx["storage"]
+    await storage.store_oauth_session(
+        session_id=state,
+        client_id=mcp_server_client_id,
+        client_redirect_uri=callback_uri,
+        state=state,
+        code_challenge=code_challenge,
+        code_challenge_method="S256",
+        mcp_authorization_code=code_verifier,  # Store code_verifier here temporarily
+        flow_type="flow2",
+        ttl_seconds=600,  # 10 minutes
+    )
+
    # Get authorization endpoint
    discovery_url = oauth_config.get("discovery_url")
    if not discovery_url:
@@ -357,8 +380,11 @@ async def oauth_authorize_nextcloud(
        "response_type": "code",
        "scope": scopes,
        "state": state,
+        "code_challenge": code_challenge,
+        "code_challenge_method": "S256",
        "prompt": "consent",  # Force consent to show resource access
        "access_type": "offline",  # Request refresh token
+        "resource": oauth_config["nextcloud_resource_uri"],  # Nextcloud audience
    }

    auth_url = f"{authorization_endpoint}?{urlencode(idp_params)}"
@@ -414,6 +440,16 @@ async def oauth_callback_nextcloud(request: Request):
    storage: RefreshTokenStorage = oauth_ctx["storage"]
    oauth_config = oauth_ctx["config"]

+    # Retrieve code_verifier from session storage (PKCE required by Nextcloud OIDC)
+    code_verifier = ""
+    oauth_session = await storage.get_oauth_session(state)
+    if oauth_session:
+        # code_verifier was stored in mcp_authorization_code field
+        code_verifier = oauth_session.get("mcp_authorization_code", "")
+        logger.info(
+            f"Retrieved code_verifier for Flow 2 callback (state={state[:16]}...)"
+        )
+
    # Exchange code for tokens
    mcp_server_client_id = os.getenv(
        "MCP_SERVER_CLIENT_ID", oauth_config.get("client_id")
@@ -422,7 +458,7 @@ async def oauth_callback_nextcloud(request: Request):
        "MCP_SERVER_CLIENT_SECRET", oauth_config.get("client_secret")
    )
    mcp_server_url = oauth_config["mcp_server_url"]
-    callback_uri = f"{mcp_server_url}/oauth/callback-nextcloud"
+    callback_uri = f"{mcp_server_url}/oauth/callback"

    discovery_url = oauth_config.get("discovery_url")
    async with httpx.AsyncClient() as http_client:
@@ -431,17 +467,24 @@ async def oauth_callback_nextcloud(request: Request):
        discovery = response.json()
        token_endpoint = discovery["token_endpoint"]

+    # Build token exchange params
+    token_params = {
+        "grant_type": "authorization_code",
+        "code": code,
+        "redirect_uri": callback_uri,
+        "client_id": mcp_server_client_id,
+        "client_secret": mcp_server_client_secret,
+    }
+
+    # Add code_verifier for PKCE (required by Nextcloud OIDC)
+    if code_verifier:
+        token_params["code_verifier"] = code_verifier
+
    # Exchange code for tokens
    async with httpx.AsyncClient() as http_client:
        response = await http_client.post(
            token_endpoint,
-            data={
-                "grant_type": "authorization_code",
-                "code": code,
-                "redirect_uri": callback_uri,
-                "client_id": mcp_server_client_id,
-                "client_secret": mcp_server_client_secret,
-            },
+            data=token_params,
        )
        response.raise_for_status()
        token_data = response.json()
@@ -450,14 +493,22 @@ async def oauth_callback_nextcloud(request: Request):
    id_token = token_data.get("id_token")

    # Decode ID token to get user info
+    logger.info("=" * 60)
+    logger.info("oauth_callback_nextcloud: Extracting user_id from ID token")
+    logger.info("=" * 60)
    try:
        userinfo = jwt.decode(id_token, options={"verify_signature": False})
        user_id = userinfo.get("sub")
        username = userinfo.get("preferred_username") or userinfo.get("email")
+        logger.info("  ✓ ID token decode SUCCESSFUL")
+        logger.info(f"  Extracted user_id: {user_id}")
+        logger.info(f"  Username: {username}")
+        logger.info(f"  ID token payload keys: {list(userinfo.keys())}")
        logger.info(f"Flow 2: User {username} provisioned resource access")
    except Exception as e:
-        logger.warning(f"Failed to decode ID token: {e}")
+        logger.error(f"  ✗ ID token decode FAILED: {type(e).__name__}: {e}")
        user_id = "unknown"
+        logger.error(f"  Using fallback user_id: {user_id}")

    # Store master refresh token for Flow 2
    if refresh_token:
@@ -466,6 +517,13 @@ async def oauth_callback_nextcloud(request: Request):
            token_data.get("scope", "").split() if token_data.get("scope") else None
        )

+        logger.info("Storing refresh token:")
+        logger.info(f"  user_id: {user_id}")
+        logger.info("  flow_type: flow2")
+        logger.info("  token_audience: nextcloud")
+        logger.info(f"  provisioning_client_id: {state[:16]}...")
+        logger.info(f"  scopes: {granted_scopes}")
+
        await storage.store_refresh_token(
            user_id=user_id,
            refresh_token=refresh_token,
@@ -475,7 +533,8 @@ async def oauth_callback_nextcloud(request: Request):
            scopes=granted_scopes,
            expires_at=None,  # Refresh tokens typically don't expire
        )
-        logger.info(f"Stored Flow 2 master refresh token for user {user_id}")
+        logger.info(f"✓ Stored Flow 2 master refresh token for user {user_id}")
+        logger.info("=" * 60)

    # Return success HTML page
    success_html = """
@@ -500,3 +559,82 @@ async def oauth_callback_nextcloud(request: Request):
    from starlette.responses import HTMLResponse

    return HTMLResponse(content=success_html, status_code=200)
+
+
+async def oauth_callback(request: Request):
+    """
+    Unified OAuth callback endpoint supporting multiple flows.
+
+    This endpoint consolidates all OAuth callback handling into a single URL.
+    The flow type is determined by looking up the OAuth session using the
+    state parameter.
+
+    This simplifies IdP configuration by requiring only one callback URL
+    to be registered: /oauth/callback
+
+    Query parameters:
+        code: Authorization code from IdP
+        state: CSRF protection state (also used to lookup flow type)
+        error: Error code (if authorization failed)
+
+    Returns:
+        Response from the appropriate flow handler
+    """
+    # Get state parameter to lookup OAuth session
+    state = request.query_params.get("state")
+    if not state:
+        logger.warning("Unified callback called without state parameter")
+        return JSONResponse(
+            {
+                "error": "invalid_request",
+                "error_description": "state parameter is required",
+            },
+            status_code=400,
+        )
+
+    # Lookup OAuth session to determine flow type
+    oauth_ctx = request.app.state.oauth_context
+    if not oauth_ctx:
+        logger.error("OAuth context not available")
+        return JSONResponse(
+            {
+                "error": "server_error",
+                "error_description": "OAuth not configured on server",
+            },
+            status_code=500,
+        )
+
+    storage = oauth_ctx["storage"]
+    oauth_session = await storage.get_oauth_session(state)
+
+    # Determine flow type from session, default to "browser" for backwards compatibility
+    flow_type = (
+        oauth_session.get("flow_type", "browser") if oauth_session else "browser"
+    )
+
+    logger.info(f"Unified callback: flow_type={flow_type} (from session lookup)")
+
+    if flow_type == "flow2":
+        # Flow 2: Resource Provisioning - MCP server gets delegated Nextcloud access
+        logger.info("Routing to Flow 2 (resource provisioning)")
+        return await oauth_callback_nextcloud(request)
+
+    elif flow_type == "browser":
+        # Browser UI Login - establish browser session for /user/page access
+        logger.info("Routing to browser login flow")
+        from nextcloud_mcp_server.auth.browser_oauth_routes import (
+            oauth_login_callback,
+        )
+
+        return await oauth_login_callback(request)
+
+    else:
+        # Unknown flow type
+        logger.warning(f"Unknown flow_type in OAuth session: {flow_type}")
+        return JSONResponse(
+            {
+                "error": "invalid_request",
+                "error_description": f"Unknown flow type: {flow_type}",
+            },
+            status_code=400,
+        )
@@ -0,0 +1,54 @@
+"""Permission checking utilities for Nextcloud admin operations."""
+
+import logging
+
+from httpx import AsyncClient
+from starlette.requests import Request
+
+from nextcloud_mcp_server.client.users import UsersClient
+
+logger = logging.getLogger(__name__)
+
+
+async def is_nextcloud_admin(request: Request, http_client: AsyncClient) -> bool:
+    """Check if the authenticated user is a Nextcloud administrator.
+
+    This function extracts the username from the session/request context
+    and checks if the user is a member of the "admin" group in Nextcloud.
+
+    Args:
+        request: Starlette request object with authenticated user
+        http_client: Authenticated HTTP client for Nextcloud API calls
+
+    Returns:
+        True if user is admin, False otherwise
+
+    Example:
+        ```python
+        if await is_nextcloud_admin(request, http_client):
+            # Show admin-only features
+            pass
+        ```
+    """
+    try:
+        # Extract username from authenticated session
+        username = request.user.display_name
+        if not username:
+            logger.warning("No username found in authenticated session")
+            return False
+
+        # Query Nextcloud for user's group memberships
+        users_client = UsersClient(http_client, username)
+        user_groups = await users_client.get_user_groups(username)
+
+        # Check if user is in the admin group
+        is_admin = "admin" in user_groups
+        logger.debug(
+            f"Admin check for user '{username}': {is_admin} (groups: {user_groups})"
+        )
+
+        return is_admin
+
+    except Exception as e:
+        logger.error(f"Error checking admin permissions: {e}", exc_info=True)
+        return False
@@ -1,366 +0,0 @@
-"""
-Token Verifier for ADR-004 Progressive Consent Architecture.
-
-This module implements token verification with strict audience separation:
- Flow 1 tokens have aud: <mcp-client-id> for MCP authentication
- Flow 2 tokens have aud: "nextcloud" for resource access
- Token Broker manages the exchange between audiences
-"""
-
-import logging
-import os
-from datetime import datetime, timezone
-from typing import Optional
-
-import httpx
-import jwt
-from mcp.server.auth.provider import AccessToken
-
-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
-from nextcloud_mcp_server.auth.token_broker import TokenBrokerService
-
-logger = logging.getLogger(__name__)
-
-
-class ProgressiveConsentTokenVerifier:
-    """
-    Token verifier for Progressive Consent dual OAuth flows.
-
-    This verifier:
-    1. Validates Flow 1 tokens (aud: <mcp-client-id>) for MCP authentication
-    2. Checks if user has provisioned Nextcloud access (Flow 2)
-    3. Uses Token Broker to obtain aud: "nextcloud" tokens when needed
-    """
-
-    def __init__(
-        self,
-        token_storage: RefreshTokenStorage | None,
-        token_broker: Optional[TokenBrokerService] = None,
-        oidc_discovery_url: Optional[str] = None,
-        nextcloud_host: Optional[str] = None,
-        encryption_key: Optional[str] = None,
-        mcp_client_id: Optional[str] = None,
-        introspection_uri: Optional[str] = None,
-        client_secret: Optional[str] = None,
-    ):
-        """
-        Initialize the Progressive Consent token verifier.
-
-        Args:
-            token_storage: Storage for refresh tokens
-            token_broker: Token broker service (created if not provided)
-            oidc_discovery_url: OIDC provider discovery URL
-            nextcloud_host: Nextcloud server URL
-            encryption_key: Fernet key for token encryption
-            mcp_client_id: MCP server OAuth client ID for audience validation
-            introspection_uri: OAuth introspection endpoint URL (for opaque tokens)
-            client_secret: OAuth client secret (required for introspection)
-        """
-        self.storage = token_storage
-        self.oidc_discovery_url = oidc_discovery_url or os.getenv(
-            "OIDC_DISCOVERY_URL",
-            f"{os.getenv('NEXTCLOUD_HOST')}/.well-known/openid-configuration",
-        )
-        self.nextcloud_host = nextcloud_host or os.getenv("NEXTCLOUD_HOST")
-        self.encryption_key = encryption_key or os.getenv("TOKEN_ENCRYPTION_KEY")
-        self.mcp_client_id = mcp_client_id or os.getenv("OIDC_CLIENT_ID")
-        self.introspection_uri = introspection_uri
-        self.client_secret = client_secret or os.getenv("OIDC_CLIENT_SECRET")
-
-        # HTTP client for introspection requests
-        self._http_client: Optional[httpx.AsyncClient] = None
-        if self.introspection_uri and self.mcp_client_id and self.client_secret:
-            self._http_client = httpx.AsyncClient(timeout=10.0)
-            logger.info(f"Introspection support enabled: {introspection_uri}")
-        elif self.introspection_uri:
-            logger.warning(
-                "Introspection URI provided but missing client credentials - introspection disabled"
-            )
-
-        # Create token broker if not provided
-        if token_broker:
-            self.token_broker = token_broker
-        elif self.encryption_key and token_storage and self.nextcloud_host:
-            self.token_broker = TokenBrokerService(
-                storage=token_storage,
-                oidc_discovery_url=self.oidc_discovery_url,
-                nextcloud_host=self.nextcloud_host,
-                encryption_key=self.encryption_key,
-            )
-        else:
-            self.token_broker = None
-            if not self.encryption_key:
-                logger.warning("Token broker not available - encryption key missing")
-            elif not token_storage:
-                logger.warning("Token broker not available - token storage missing")
-            elif not self.nextcloud_host:
-                logger.warning("Token broker not available - nextcloud host missing")
-
-    async def verify_token(self, token: str) -> Optional[AccessToken]:
-        """
-        Verify a Flow 1 token (aud: <mcp-client-id>).
-
-        This validates that:
-        1. Token has correct audience for MCP server (matches client ID)
-        2. Token is not expired
-        3. Token has valid signature (if verification enabled)
-
-        Supports both JWT and opaque tokens:
-        - JWT tokens: Decoded directly from payload
-        - Opaque tokens: Validated via introspection endpoint (RFC 7662)
-
-        Args:
-            token: Access token from Flow 1 (JWT or opaque)
-
-        Returns:
-            AccessToken if valid, None otherwise
-        """
-        logger.info("🔐 verify_token called - attempting to validate token")
-        logger.info(f"Token (first 50 chars): {token[:50]}...")
-        logger.info(f"Expected MCP client ID: {self.mcp_client_id}")
-
-        # Check if token is JWT format (has 3 parts separated by dots)
-        is_jwt = "." in token and token.count(".") == 2
-        logger.info(f"Token format: {'JWT' if is_jwt else 'opaque'}")
-
-        if is_jwt:
-            # Try JWT verification
-            return await self._verify_jwt_token(token)
-        else:
-            # Fall back to introspection for opaque tokens
-            return await self._verify_opaque_token(token)
-
-    async def _verify_jwt_token(self, token: str) -> Optional[AccessToken]:
-        """Verify JWT token by decoding payload."""
-        try:
-            # Decode without signature verification (IdP handles that)
-            # In production, would verify signature with IdP public key
-            payload = jwt.decode(token, options={"verify_signature": False})
-            logger.info(f"Token payload decoded: {payload}")
-
-            # CRITICAL: Verify audience is for MCP server (Flow 1)
-            audiences = payload.get("aud", [])
-            if isinstance(audiences, str):
-                audiences = [audiences]
-
-            # Audience validation:
-            # - Accept tokens with no audience (will validate via introspection if needed)
-            # - Accept tokens with MCP client ID in audience (Keycloak multi-audience)
-            # - Accept tokens with resource URL in audience (Nextcloud JWT redirect URI)
-            # - Reject tokens with "nextcloud" audience only (wrong flow)
-            if audiences:
-                # Check if MCP client ID is in the audience (Keycloak multi-audience)
-                if self.mcp_client_id in audiences:
-                    logger.debug(
-                        f"Token has audience {audiences} - MCP client ID present"
-                    )
-                # Check if this is a Nextcloud-only token (wrong flow)
-                elif audiences == ["nextcloud"]:
-                    logger.warning(
-                        f"Token rejected: Nextcloud-only audience {audiences}"
-                    )
-                    logger.error(
-                        "Received Nextcloud token in MCP context - "
-                        "client may be using wrong token"
-                    )
-                    return None
-                # Otherwise accept (likely resource URL audience from Nextcloud JWT)
-                else:
-                    logger.info(
-                        f"Token has audience {audiences} (resource URL or non-standard) - accepting"
-                    )
-            else:
-                logger.info(
-                    "Token has no audience claim - accepting for MCP server validation"
-                )
-
-            # Check expiry
-            exp = payload.get("exp", 0)
-            if exp < datetime.now(timezone.utc).timestamp():
-                logger.warning(
-                    f"❌ Token expired: exp={exp}, now={datetime.now(timezone.utc).timestamp()}"
-                )
-                return None
-
-            # Extract user info
-            user_id = payload.get("sub", "unknown")
-            client_id = payload.get("client_id", "unknown")
-            scopes = payload.get("scope", "").split()
-            exp = payload.get("exp", None)
-
-            logger.info(
-                f"✅ Token validation successful! user={user_id}, scopes={scopes}"
-            )
-
-            # Create AccessToken for MCP framework
-            return AccessToken(
-                token=token,
-                client_id=client_id,
-                scopes=scopes,
-                expires_at=exp,
-                resource=user_id,  # Store user_id in resource field (RFC 8707)
-            )
-
-        except jwt.InvalidTokenError as e:
-            logger.warning(f"❌ Invalid token (JWT decode failed): {e}")
-            return None
-        except Exception as e:
-            logger.error(f"❌ Token verification failed with exception: {e}")
-            return None
-
-    async def _verify_opaque_token(self, token: str) -> Optional[AccessToken]:
-        """
-        Verify opaque token via introspection endpoint (RFC 7662).
-
-        Args:
-            token: Opaque access token
-
-        Returns:
-            AccessToken if active and valid, None otherwise
-        """
-        if not self._http_client or not self.introspection_uri:
-            logger.error(
-                "❌ Cannot verify opaque token - introspection not configured. "
-                "Set introspection_uri and client credentials."
-            )
-            return None
-
-        try:
-            logger.info(f"Introspecting token at {self.introspection_uri}")
-
-            # Call introspection endpoint (requires client authentication)
-            response = await self._http_client.post(
-                self.introspection_uri,
-                data={"token": token},
-                auth=(self.mcp_client_id, self.client_secret),
-            )
-
-            if response.status_code != 200:
-                logger.warning(
-                    f"❌ Introspection failed: HTTP {response.status_code} - {response.text[:200]}"
-                )
-                return None
-
-            introspection_data = response.json()
-            logger.info(f"Introspection response: {introspection_data}")
-
-            # Check if token is active
-            if not introspection_data.get("active", False):
-                logger.warning("❌ Token introspection returned active=false")
-                return None
-
-            # Extract user info
-            user_id = introspection_data.get("sub") or introspection_data.get(
-                "username"
-            )
-            if not user_id:
-                logger.error("❌ No username found in introspection response")
-                return None
-
-            # Extract scopes (space-separated string)
-            scope_string = introspection_data.get("scope", "")
-            scopes = scope_string.split() if scope_string else []
-
-            # Extract client ID and expiration
-            client_id = introspection_data.get("client_id", "unknown")
-            exp = introspection_data.get("exp")
-
-            logger.info(f"✅ Opaque token validated! user={user_id}, scopes={scopes}")
-
-            return AccessToken(
-                token=token,
-                client_id=client_id,
-                scopes=scopes,
-                expires_at=int(exp) if exp else None,
-                resource=user_id,
-            )
-
-        except httpx.TimeoutException:
-            logger.error("❌ Timeout while introspecting token")
-            return None
-        except httpx.RequestError as e:
-            logger.error(f"❌ Network error during introspection: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"❌ Introspection failed with exception: {e}")
-            return None
-
-    async def check_provisioning(self, user_id: str) -> bool:
-        """
-        Check if user has provisioned Nextcloud access (Flow 2).
-
-        Args:
-            user_id: User identifier from Flow 1 token
-
-        Returns:
-            True if user has completed Flow 2, False otherwise
-        """
-        if not self.storage:
-            return False
-
-        refresh_data = await self.storage.get_refresh_token(user_id)
-        return refresh_data is not None
-
-    async def get_nextcloud_token(self, user_id: str) -> Optional[str]:
-        """
-        Get a Nextcloud access token (aud: "nextcloud") for the user.
-
-        This uses the Token Broker to:
-        1. Check for cached Nextcloud token
-        2. If expired, refresh using stored master refresh token
-        3. Return token with aud: "nextcloud" for API access
-
-        Args:
-            user_id: User identifier from Flow 1 token
-
-        Returns:
-            Nextcloud access token if provisioned, None otherwise
-        """
-        if not self.token_broker:
-            logger.error("Token broker not available")
-            return None
-
-        # Check if user has provisioned access
-        if not await self.check_provisioning(user_id):
-            logger.info(f"User {user_id} has not provisioned Nextcloud access")
-            return None
-
-        # Get or refresh Nextcloud token
-        try:
-            nextcloud_token = await self.token_broker.get_nextcloud_token(user_id)
-            if nextcloud_token:
-                logger.debug(f"Obtained Nextcloud token for user {user_id}")
-            return nextcloud_token
-        except Exception as e:
-            logger.error(f"Failed to get Nextcloud token: {e}")
-            return None
-
-    async def validate_scopes(
-        self, token: AccessToken, required_scopes: list[str]
-    ) -> bool:
-        """
-        Validate that token has required scopes.
-
-        Args:
-            token: The access token
-            required_scopes: List of required scopes
-
-        Returns:
-            True if all required scopes present, False otherwise
-        """
-        token_scopes = set(token.scopes) if token.scopes else set()
-        required = set(required_scopes)
-
-        missing = required - token_scopes
-        if missing:
-            logger.debug(f"Token missing required scopes: {missing}")
-            return False
-
-        return True
-
-    async def close(self):
-        """Clean up resources."""
-        if self.token_broker:
-            await self.token_broker.close()
-        if self._http_client:
-            await self._http_client.aclose()
@@ -1,8 +1,8 @@
 """
-Provisioning decorator for ADR-004 Progressive Consent Architecture.
+Provisioning decorator for ADR-004 (Offline Access Architecture).

 This decorator ensures users have completed Flow 2 (Resource Provisioning)
-before accessing Nextcloud resources.
+before accessing Nextcloud resources when offline access is enabled.
 """

 import functools
@@ -13,7 +13,7 @@ from mcp.server.fastmcp import Context
 from mcp.shared.exceptions import McpError
 from mcp.types import ErrorData

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -73,7 +73,7 @@ def require_provisioning(func: Callable) -> Callable:
            logger.debug("Token exchange mode detected - skipping provisioning check")
            return await func(*args, **kwargs)

-        # Progressive Consent mode (offline access) - check if user has completed Flow 2 provisioning
+        # Offline access mode - check if user has completed Flow 2 provisioning
        # Get user_id from authorization token
        user_id = None
        if hasattr(ctx, "authorization") and ctx.authorization:
@@ -130,13 +130,13 @@ def require_scopes(*required_scopes: str):
            token_scopes = set(access_token.scopes or [])
            required_scopes_set = set(required_scopes)

-            # Check if Progressive Consent is enabled
-            enable_progressive = (
-                os.getenv("ENABLE_PROGRESSIVE_CONSENT", "false").lower() == "true"
+            # Check if offline access is enabled
+            enable_offline_access = (
+                os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() == "true"
            )

-            # In Progressive Consent mode, check if Nextcloud scopes require provisioning
-            if enable_progressive:
+            # In offline access mode, check if Nextcloud scopes require provisioning
+            if enable_offline_access:
                # Check if any required scopes are Nextcloud-specific
                nextcloud_scopes = [
                    s
@@ -0,0 +1,219 @@
+.viz-layout {
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    height: 100%;
+    min-height: 0;
+    overflow-y: auto;
+}
+.viz-card {
+    background: var(--color-main-background);
+    border-radius: 0;
+    padding: 16px;
+    box-shadow: none;
+}
+.viz-controls-card {
+    flex: 0 0 auto;
+    border-bottom: 1px solid var(--color-border);
+    padding-bottom: 16px;
+}
+.viz-controls-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 12px;
+    align-items: end;
+}
+@media (min-width: 768px) {
+    .viz-controls-grid {
+        grid-template-columns: 2fr 1.5fr 1.5fr auto auto;
+    }
+}
+.viz-control-group {
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+}
+.viz-control-group label {
+    font-weight: 500;
+    color: var(--color-main-text);
+    font-size: 13px;
+}
+.viz-control-group input[type="text"],
+.viz-control-group input[type="number"],
+.viz-control-group select {
+    width: 100%;
+    padding: 7px 10px;
+    border: 1px solid var(--color-border-dark);
+    border-radius: var(--border-radius);
+    font-size: 14px;
+    background: var(--color-main-background);
+    color: var(--color-main-text);
+}
+.viz-control-group input:focus,
+.viz-control-group select:focus {
+    outline: none;
+    border-color: var(--color-primary-element);
+}
+.viz-control-group input[type="range"] {
+    width: 100%;
+}
+.viz-control-group select[multiple] {
+    min-height: 100px;
+}
+.viz-weight-display {
+    display: inline-block;
+    min-width: 40px;
+    text-align: right;
+    color: #666;
+}
+.viz-btn {
+    background: var(--color-primary-element);
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    font-weight: 500;
+    white-space: nowrap;
+}
+.viz-btn:hover {
+    background: #0052a3;
+}
+.viz-btn-secondary {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    white-space: nowrap;
+}
+.viz-btn-secondary:hover {
+    background: #5a6268;
+}
+.viz-card-plot {
+    flex: 0 0 auto;
+    display: flex;
+    flex-direction: column;
+    min-height: 500px;
+    height: 600px;
+    /* Remove horizontal padding to extend to full viewport width */
+    padding-left: 0;
+    padding-right: 0;
+    margin-left: -16px;
+    margin-right: -16px;
+}
+#viz-plot-container {
+    width: 100%;
+    height: 100%;
+    position: relative;
+    overflow: visible;
+}
+#viz-plot {
+    width: 100%;
+    height: 100%;
+}
+.viz-loading {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+}
+.viz-loading-overlay {
+    position: absolute;
+    inset: 0;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: white;
+    color: #666;
+}
+.viz-no-results {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+    font-style: italic;
+}
+.viz-advanced-section {
+    margin-top: 12px;
+    padding: 12px;
+    background: var(--color-background-hover);
+    border-radius: var(--border-radius);
+    border: 1px solid var(--color-border);
+}
+.viz-info-box {
+    background: var(--color-primary-element-light);
+    border-left: 3px solid var(--color-primary-element);
+    padding: 10px 12px;
+    margin-bottom: 16px;
+    font-size: 13px;
+    color: var(--color-main-text);
+}
+.chunk-toggle-btn {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 4px 10px;
+    border-radius: 3px;
+    cursor: pointer;
+    font-size: 12px;
+    margin-top: 6px;
+}
+.chunk-toggle-btn:hover {
+    background: #5a6268;
+}
+.chunk-context {
+    background: var(--color-background-hover);
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    padding: 12px;
+    margin-top: 8px;
+    font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+    font-size: 13px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+.chunk-text {
+    color: var(--color-text-maxcontrast);
+}
+.chunk-matched {
+    background: #fff3cd;
+    border: 1px solid #ffc107;
+    padding: 2px 4px;
+    border-radius: var(--border-radius);
+    font-weight: 500;
+    color: var(--color-main-text);
+}
+.chunk-ellipsis {
+    color: var(--color-text-maxcontrast);
+    font-style: italic;
+}
+
+/* PDF highlighted image styles */
+.chunk-image-container {
+    margin-bottom: 16px;
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    overflow: hidden;
+    background: #fff;
+}
+.chunk-image-header {
+    background: var(--color-background-dark);
+    padding: 8px 12px;
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--color-text-maxcontrast);
+    border-bottom: 1px solid var(--color-border);
+    font-family: var(--font-face);
+}
+.chunk-highlighted-image {
+    display: block;
+    max-width: 100%;
+    height: auto;
+    cursor: zoom-in;
+}
+.chunk-highlighted-image:hover {
+    opacity: 0.95;
+}
@@ -0,0 +1,253 @@
+// Initialize vizApp for vector visualization
+function vizApp() {
+    return {
+        query: '',
+        algorithm: 'bm25_hybrid',
+        fusion: 'rrf',
+        showAdvanced: false,
+        showQueryPoint: true,
+        docTypes: [''],
+        limit: 50,
+        scoreThreshold: 0.0,
+        loading: false,
+        results: [],
+        coordinates: null,
+        queryCoords: null,
+        expandedChunks: {},
+        chunkLoading: {},
+
+        init() {
+            // Set up window resize listener to resize plot
+            window.addEventListener('resize', () => {
+                if (this.coordinates && this.results.length > 0) {
+                    Plotly.Plots.resize('viz-plot');
+                }
+            });
+        },
+
+        async executeSearch() {
+            this.loading = true;
+            this.results = [];
+
+            try {
+                const params = new URLSearchParams({
+                    query: this.query,
+                    algorithm: this.algorithm,
+                    limit: this.limit,
+                    score_threshold: this.scoreThreshold,
+                });
+
+                if (this.algorithm === 'bm25_hybrid') {
+                    params.append('fusion', this.fusion);
+                }
+
+                const selectedTypes = this.docTypes.filter(t => t !== '');
+                if (selectedTypes.length > 0) {
+                    params.append('doc_types', selectedTypes.join(','));
+                }
+
+                const response = await fetch(`/app/vector-viz/search?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.results = data.results;
+                    this.coordinates = data.coordinates_3d;
+                    this.queryCoords = data.query_coords;
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                } else {
+                    alert('Search failed: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error: ' + error.message);
+            } finally {
+                this.loading = false;
+            }
+        },
+
+        updatePlot() {
+            // Toggle query point visibility without recreating the plot
+            // This preserves camera position naturally since layout is untouched
+            if (this.coordinates && this.queryCoords && this.results.length > 0) {
+                const plotDiv = document.getElementById('viz-plot');
+
+                // If plot exists, just toggle the query trace visibility
+                if (plotDiv && plotDiv.data && plotDiv.data.length >= 2) {
+                    // Trace index 1 is the query point
+                    Plotly.restyle('viz-plot', { visible: this.showQueryPoint }, [1]);
+                } else {
+                    // Plot doesn't exist yet, render it
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                }
+            }
+        },
+
+        renderPlot(coordinates, queryCoords, results) {
+            // Get container dimensions before creating layout
+            const container = document.getElementById('viz-plot-container');
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+
+            const scores = results.map(r => r.score);
+
+            // Trace 1: Document results (always visible)
+            const documentTrace = {
+                x: coordinates.map(c => c[0]),
+                y: coordinates.map(c => c[1]),
+                z: coordinates.map(c => c[2]),
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Documents',
+                visible: true,
+                customdata: results.map((r, i) => ({
+                    title: r.title,
+                    raw_score: r.original_score,
+                    relative_score: r.score,
+                    x: coordinates[i][0],
+                    y: coordinates[i][1],
+                    z: coordinates[i][2]
+                })),
+                hovertemplate:
+                    '<b>%{customdata.title}</b><br>' +
+                    'Raw Score: %{customdata.raw_score:.3f} (%{customdata.relative_score:.0%} relative)<br>' +
+                    '(x=%{customdata.x}, y=%{customdata.y}, z=%{customdata.z})' +
+                    '<extra></extra>',
+                marker: {
+                    size: results.map(r => 4 + (Math.pow(r.score, 2) * 10)),
+                    opacity: results.map(r => 0.3 + (r.score * 0.7)),
+                    color: scores,
+                    colorscale: 'Viridis',
+                    showscale: true,
+                    colorbar: {
+                        title: 'Relative Score',
+                        x: 1.02,
+                        xanchor: 'left',
+                        thickness: 20,
+                        len: 0.8
+                    },
+                    cmin: 0,
+                    cmax: 1
+                }
+            };
+
+            // Trace 2: Query point (visibility controlled by toggle)
+            const queryTrace = {
+                x: [queryCoords[0]],
+                y: [queryCoords[1]],
+                z: [queryCoords[2]],
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Query',
+                visible: this.showQueryPoint,  // Initial visibility from state
+                hovertemplate:
+                    '<b>Search Query</b><br>' +
+                    `(x=${queryCoords[0]}, y=${queryCoords[1]}, z=${queryCoords[2]})` +
+                    '<extra></extra>',
+                marker: {
+                    size: 10,
+                    color: '#ef5350',  // Subdued red (Material Design Red 400)
+                    line: {
+                        color: '#c62828',  // Darker red border (Material Design Red 800)
+                        width: 1
+                    }
+                }
+            };
+
+            const layout = {
+                title: `Vector Space (PCA 3D) - ${results.length} results`,
+                width: width,   // Explicit width from container
+                height: height, // Explicit height from container
+                scene: {
+                    xaxis: { title: 'PC1' },
+                    yaxis: { title: 'PC2' },
+                    zaxis: { title: 'PC3' },
+                    camera: {
+                        eye: { x: 1.5, y: 1.5, z: 1.5 }
+                    },
+                    // Full width for 3D scene
+                    domain: {
+                        x: [0, 1],
+                        y: [0, 1]
+                    }
+                },
+                hovermode: 'closest',
+                autosize: true,  // Enable auto-sizing for window resizes
+                showlegend: false,  // Hide legend
+                margin: { l: 0, r: 100, t: 40, b: 0 }  // Right margin for colorbar
+            };
+
+            // Always render both traces - visibility is controlled by the visible property
+            const traces = [documentTrace, queryTrace];
+
+            // Enable responsive resizing
+            const config = {
+                responsive: true,
+                displayModeBar: true
+            };
+
+            // Use newPlot() with explicit dimensions - renders at correct size immediately
+            // Camera position will be preserved by subsequent Plotly.restyle() calls in updatePlot()
+            Plotly.newPlot('viz-plot', traces, layout, config);
+        },
+
+        getNextcloudUrl(result) {
+            // Use global NEXTCLOUD_BASE_URL if set, otherwise construct from window location
+            const baseUrl = window.NEXTCLOUD_BASE_URL || '';
+            switch (result.doc_type) {
+                case 'note':
+                    return `${baseUrl}/apps/notes/note/${result.id}`;
+                case 'file':
+                    return `${baseUrl}/apps/files/?fileId=${result.id}`;
+                case 'calendar':
+                    return `${baseUrl}/apps/calendar`;
+                case 'contact':
+                    return `${baseUrl}/apps/contacts`;
+                case 'deck':
+                    return `${baseUrl}/apps/deck`;
+                default:
+                    return `${baseUrl}`;
+            }
+        },
+
+        hasChunkPosition(result) {
+            return result.chunk_start_offset != null && result.chunk_end_offset != null;
+        },
+
+        isChunkExpanded(resultKey) {
+            return this.expandedChunks[resultKey] !== undefined;
+        },
+
+        async toggleChunk(result) {
+            const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`;
+
+            if (this.isChunkExpanded(resultKey)) {
+                delete this.expandedChunks[resultKey];
+                return;
+            }
+
+            this.chunkLoading[resultKey] = true;
+
+            try {
+                const params = new URLSearchParams({
+                    doc_type: result.doc_type,
+                    doc_id: result.id,
+                    start: result.chunk_start_offset,
+                    end: result.chunk_end_offset,
+                    context: 500
+                });
+
+                const response = await fetch(`/app/chunk-context?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.expandedChunks[resultKey] = data;
+                } else {
+                    alert('Failed to load chunk: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error loading chunk: ' + error.message);
+            } finally {
+                delete this.chunkLoading[resultKey];
+            }
+        }
+    };
+}
@@ -1,23 +1,28 @@
 """
-Refresh Token Storage for ADR-002 Tier 1: Offline Access
+Persistent Storage for MCP Server State

-Manages two separate concerns for OAuth authentication:
+This module provides SQLite-based storage for multiple concerns across both
+BasicAuth and OAuth authentication modes:

-1. **Refresh Tokens** (for background jobs ONLY)
+1. **Refresh Tokens** (OAuth mode only, for background jobs)
   - Securely stores encrypted refresh tokens for offline access
   - Used ONLY by background jobs to obtain access tokens
   - NEVER used within MCP client sessions or browser sessions

-2. **User Profile Cache** (for browser UI display ONLY)
+2. **User Profile Cache** (OAuth mode only, for browser UI display)
   - Caches IdP user profile data for browser-based admin UI
   - Queried ONCE at login, displayed from cache thereafter
   - NOT used for authorization decisions or background jobs

-IMPORTANT: These are separate concerns. Browser sessions read profile cache for
-display purposes. Background jobs use refresh tokens for API access. Never mix
-the two.
+3. **Webhook Registration Tracking** (both modes, for webhook management)
+   - Tracks registered webhook IDs mapped to presets
+   - Enables persistent webhook state across restarts
+   - Avoids redundant Nextcloud API calls for webhook status

-Tokens are encrypted at rest using Fernet symmetric encryption.
+IMPORTANT: The database is initialized in both BasicAuth and OAuth modes.
+Token storage requires TOKEN_ENCRYPTION_KEY, but webhook tracking does not.
+
+Sensitive data (tokens, secrets) is encrypted at rest using Fernet symmetric encryption.
 """

 import json
@@ -30,29 +35,40 @@ from typing import Any, Optional
 import aiosqlite
 from cryptography.fernet import Fernet

+from nextcloud_mcp_server.observability.metrics import record_db_operation
+
 logger = logging.getLogger(__name__)


 class RefreshTokenStorage:
-    """Securely store and manage user refresh tokens and profile cache.
+    """Persistent storage for MCP server state (tokens, webhooks, and future features).

-    This class manages two separate concerns:
-    - Refresh tokens: Encrypted storage for background job access (write-only by OAuth, read-only by background jobs)
-    - User profiles: Plain JSON cache for browser UI display (written at login, read by UI)
+    This class manages multiple concerns across both BasicAuth and OAuth modes:

-    These concerns are architecturally separate and should never be mixed.
+    **OAuth-specific concerns**:
+    - Refresh tokens: Encrypted storage for background job access (requires encryption key)
+    - User profiles: Plain JSON cache for browser UI display
+    - OAuth client credentials: Encrypted client secrets from DCR
+    - OAuth sessions: Temporary session state for progressive consent flow
+
+    **Both modes**:
+    - Webhook registration: Track registered webhooks mapped to presets
+    - Schema versioning: Handle database migrations automatically
+
+    Token-related operations require TOKEN_ENCRYPTION_KEY, but webhook operations do not.
    """

-    def __init__(self, db_path: str, encryption_key: bytes):
+    def __init__(self, db_path: str, encryption_key: bytes | None = None):
        """
-        Initialize refresh token storage.
+        Initialize persistent storage.

        Args:
            db_path: Path to SQLite database file
-            encryption_key: Fernet encryption key (32 bytes, base64-encoded)
+            encryption_key: Optional Fernet encryption key (32 bytes, base64-encoded).
+                          Required for token storage operations, not required for webhook tracking.
        """
        self.db_path = db_path
-        self.cipher = Fernet(encryption_key)
+        self.cipher = Fernet(encryption_key) if encryption_key else None
        self._initialized = False

    @classmethod
@@ -62,41 +78,42 @@ class RefreshTokenStorage:

        Environment variables:
            TOKEN_STORAGE_DB: Path to database file (default: /app/data/tokens.db)
-            TOKEN_ENCRYPTION_KEY: Base64-encoded Fernet key
+            TOKEN_ENCRYPTION_KEY: Optional base64-encoded Fernet key (required for token storage)

        Returns:
            RefreshTokenStorage instance

-        Raises:
-            ValueError: If TOKEN_ENCRYPTION_KEY is not set
+        Note:
+            If TOKEN_ENCRYPTION_KEY is not set, token storage operations will fail,
+            but webhook tracking will still work.
        """
        db_path = os.getenv("TOKEN_STORAGE_DB", "/app/data/tokens.db")
        encryption_key_b64 = os.getenv("TOKEN_ENCRYPTION_KEY")

-        if not encryption_key_b64:
-            raise ValueError(
-                "TOKEN_ENCRYPTION_KEY environment variable is required. "
-                "Generate one with: python -c 'from cryptography.fernet import Fernet; "
-                "print(Fernet.generate_key().decode())'"
+        encryption_key = None
+        if encryption_key_b64:
+            # Fernet expects a base64url-encoded key as bytes, not decoded bytes
+            # The key from Fernet.generate_key() is already base64url-encoded
+            try:
+                # Convert string to bytes if needed
+                if isinstance(encryption_key_b64, str):
+                    encryption_key = encryption_key_b64.encode()
+                else:
+                    encryption_key = encryption_key_b64
+
+                # Validate the key by trying to create a Fernet instance
+                Fernet(encryption_key)
+            except Exception as e:
+                raise ValueError(
+                    f"Invalid TOKEN_ENCRYPTION_KEY: {e}. "
+                    "Must be a valid Fernet key (base64url-encoded 32 bytes)."
+                ) from e
+        else:
+            logger.info(
+                "TOKEN_ENCRYPTION_KEY not set - token storage operations will be unavailable, "
+                "but webhook tracking will still work"
            )

-        # Fernet expects a base64url-encoded key as bytes, not decoded bytes
-        # The key from Fernet.generate_key() is already base64url-encoded
-        try:
-            # Convert string to bytes if needed
-            if isinstance(encryption_key_b64, str):
-                encryption_key = encryption_key_b64.encode()
-            else:
-                encryption_key = encryption_key_b64
-
-            # Validate the key by trying to create a Fernet instance
-            Fernet(encryption_key)
-        except Exception as e:
-            raise ValueError(
-                f"Invalid TOKEN_ENCRYPTION_KEY: {e}. "
-                "Must be a valid Fernet key (base64url-encoded 32 bytes)."
-            ) from e
-
        return cls(db_path=db_path, encryption_key=encryption_key)

    async def initialize(self) -> None:
@@ -204,6 +221,38 @@ class RefreshTokenStorage:
                "ON oauth_sessions(mcp_authorization_code)"
            )

+            # Schema version tracking
+            await db.execute(
+                """
+                CREATE TABLE IF NOT EXISTS schema_version (
+                    version INTEGER PRIMARY KEY,
+                    applied_at REAL NOT NULL
+                )
+                """
+            )
+
+            # Registered webhooks tracking (both BasicAuth and OAuth modes)
+            await db.execute(
+                """
+                CREATE TABLE IF NOT EXISTS registered_webhooks (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    webhook_id INTEGER NOT NULL UNIQUE,
+                    preset_id TEXT NOT NULL,
+                    created_at REAL NOT NULL
+                )
+                """
+            )
+
+            # Create indexes for efficient webhook queries
+            await db.execute(
+                "CREATE INDEX IF NOT EXISTS idx_webhooks_preset "
+                "ON registered_webhooks(preset_id)"
+            )
+            await db.execute(
+                "CREATE INDEX IF NOT EXISTS idx_webhooks_created "
+                "ON registered_webhooks(created_at)"
+            )
+
            await db.commit()

        # Set restrictive permissions after creation
@@ -245,35 +294,43 @@ class RefreshTokenStorage:
        # For Flow 2, set provisioned_at timestamp
        provisioned_at = now if flow_type == "flow2" else None

-        async with aiosqlite.connect(self.db_path) as db:
-            await db.execute(
-                """
-                INSERT OR REPLACE INTO refresh_tokens
-                (user_id, encrypted_token, expires_at, created_at, updated_at,
-                 flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
-                VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
-                        ?, ?, ?, ?, ?)
-                """,
-                (
-                    user_id,
-                    encrypted_token,
-                    expires_at,
-                    user_id,
-                    now,
-                    now,
-                    flow_type,
-                    token_audience,
-                    provisioned_at,
-                    provisioning_client_id,
-                    scopes_json,
-                ),
-            )
-            await db.commit()
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                await db.execute(
+                    """
+                    INSERT OR REPLACE INTO refresh_tokens
+                    (user_id, encrypted_token, expires_at, created_at, updated_at,
+                     flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
+                    VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
+                            ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        user_id,
+                        encrypted_token,
+                        expires_at,
+                        user_id,
+                        now,
+                        now,
+                        flow_type,
+                        token_audience,
+                        provisioned_at,
+                        provisioning_client_id,
+                        scopes_json,
+                    ),
+                )
+                await db.commit()
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "success")

-        logger.info(
-            f"Stored refresh token for user {user_id}"
-            + (f" (expires at {expires_at})" if expires_at else "")
-        )
+            logger.info(
+                f"Stored refresh token for user {user_id}"
+                + (f" (expires at {expires_at})" if expires_at else "")
+            )
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "error")
+            raise

        # Audit log
        await self._audit_log(
@@ -375,40 +432,45 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            async with db.execute(
-                """
-                SELECT encrypted_token, expires_at, flow_type, token_audience,
-                       provisioned_at, provisioning_client_id, scopes
-                FROM refresh_tokens WHERE user_id = ?
-                """,
-                (user_id,),
-            ) as cursor:
-                row = await cursor.fetchone()
-
-        if not row:
-            logger.debug(f"No refresh token found for user {user_id}")
-            return None
-
-        (
-            encrypted_token,
-            expires_at,
-            flow_type,
-            token_audience,
-            provisioned_at,
-            provisioning_client_id,
-            scopes_json,
-        ) = row
-
-        # Check expiration
-        if expires_at is not None and expires_at < time.time():
-            logger.warning(
-                f"Refresh token for user {user_id} has expired (expired at {expires_at})"
-            )
-            await self.delete_refresh_token(user_id)
-            return None
-
+        start_time = time.time()
        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                async with db.execute(
+                    """
+                    SELECT encrypted_token, expires_at, flow_type, token_audience,
+                           provisioned_at, provisioning_client_id, scopes
+                    FROM refresh_tokens WHERE user_id = ?
+                    """,
+                    (user_id,),
+                ) as cursor:
+                    row = await cursor.fetchone()
+
+            if not row:
+                logger.debug(f"No refresh token found for user {user_id}")
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
+            (
+                encrypted_token,
+                expires_at,
+                flow_type,
+                token_audience,
+                provisioned_at,
+                provisioning_client_id,
+                scopes_json,
+            ) = row
+
+            # Check expiration
+            if expires_at is not None and expires_at < time.time():
+                logger.warning(
+                    f"Refresh token for user {user_id} has expired (expired at {expires_at})"
+                )
+                await self.delete_refresh_token(user_id)
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
            decrypted_token = self.cipher.decrypt(encrypted_token).decode()
            scopes = json.loads(scopes_json) if scopes_json else None

@@ -416,6 +478,9 @@ class RefreshTokenStorage:
                f"Retrieved refresh token for user {user_id} (flow_type: {flow_type})"
            )

+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "success")
+
            return {
                "refresh_token": decrypted_token,
                "expires_at": expires_at,
@@ -427,9 +492,89 @@ class RefreshTokenStorage:
                "scopes": scopes,
            }
        except Exception as e:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "error")
            logger.error(f"Failed to decrypt refresh token for user {user_id}: {e}")
            return None

+    async def get_refresh_token_by_provisioning_client_id(
+        self, provisioning_client_id: str
+    ) -> Optional[dict]:
+        """
+        Retrieve and decrypt refresh token by provisioning_client_id (state parameter).
+
+        This is used to check if an OAuth Flow 2 login completed successfully
+        by looking up the refresh token using the state parameter that was generated
+        during the authorization request.
+
+        Args:
+            provisioning_client_id: OAuth state parameter from the authorization request
+
+        Returns:
+            Dictionary with token data or None if not found
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            async with db.execute(
+                """
+                SELECT user_id, encrypted_token, expires_at, flow_type, token_audience,
+                       provisioned_at, provisioning_client_id, scopes
+                FROM refresh_tokens WHERE provisioning_client_id = ?
+                """,
+                (provisioning_client_id,),
+            ) as cursor:
+                row = await cursor.fetchone()
+
+        if not row:
+            logger.debug(
+                f"No refresh token found for provisioning_client_id {provisioning_client_id[:16]}..."
+            )
+            return None
+
+        (
+            user_id,
+            encrypted_token,
+            expires_at,
+            flow_type,
+            token_audience,
+            provisioned_at,
+            prov_client_id,
+            scopes_json,
+        ) = row
+
+        # Check expiration
+        if expires_at is not None and expires_at < time.time():
+            logger.warning(
+                f"Refresh token for provisioning_client_id {provisioning_client_id[:16]}... has expired"
+            )
+            return None
+
+        try:
+            decrypted_token = self.cipher.decrypt(encrypted_token).decode()
+            scopes = json.loads(scopes_json) if scopes_json else None
+
+            logger.debug(
+                f"Retrieved refresh token for provisioning_client_id {provisioning_client_id[:16]}... (user_id: {user_id})"
+            )
+
+            return {
+                "user_id": user_id,
+                "refresh_token": decrypted_token,
+                "expires_at": expires_at,
+                "flow_type": flow_type or "hybrid",
+                "token_audience": token_audience or "nextcloud",
+                "provisioned_at": provisioned_at,
+                "provisioning_client_id": prov_client_id,
+                "scopes": scopes,
+            }
+        except Exception as e:
+            logger.error(
+                f"Failed to decrypt refresh token for provisioning_client_id {provisioning_client_id[:16]}...: {e}"
+            )
+            return None
+
    async def delete_refresh_token(self, user_id: str) -> bool:
        """
        Delete refresh token for user.
@@ -443,25 +588,34 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            cursor = await db.execute(
-                "DELETE FROM refresh_tokens WHERE user_id = ?",
-                (user_id,),
-            )
-            await db.commit()
-            deleted = cursor.rowcount > 0
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                cursor = await db.execute(
+                    "DELETE FROM refresh_tokens WHERE user_id = ?",
+                    (user_id,),
+                )
+                await db.commit()
+                deleted = cursor.rowcount > 0

-        if deleted:
-            logger.info(f"Deleted refresh token for user {user_id}")
-            await self._audit_log(
-                event="delete_refresh_token",
-                user_id=user_id,
-                auth_method="offline_access",
-            )
-        else:
-            logger.debug(f"No refresh token to delete for user {user_id}")
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "success")

-        return deleted
+            if deleted:
+                logger.info(f"Deleted refresh token for user {user_id}")
+                await self._audit_log(
+                    event="delete_refresh_token",
+                    user_id=user_id,
+                    auth_method="offline_access",
+                )
+            else:
+                logger.debug(f"No refresh token to delete for user {user_id}")
+
+            return deleted
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "error")
+            raise

    async def get_all_user_ids(self) -> list[str]:
        """
@@ -1026,6 +1180,123 @@ class RefreshTokenStorage:

        return deleted

+    # ============================================================================
+    # Webhook Registration Tracking (both BasicAuth and OAuth modes)
+    # ============================================================================
+
+    async def store_webhook(self, webhook_id: int, preset_id: str) -> None:
+        """
+        Store registered webhook ID for tracking.
+
+        Args:
+            webhook_id: Nextcloud webhook ID
+            preset_id: Preset identifier (e.g., "notes_sync", "calendar_sync")
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                "INSERT OR REPLACE INTO registered_webhooks (webhook_id, preset_id, created_at) VALUES (?, ?, ?)",
+                (webhook_id, preset_id, time.time()),
+            )
+            await db.commit()
+
+        logger.debug(f"Stored webhook {webhook_id} for preset '{preset_id}'")
+
+    async def get_webhooks_by_preset(self, preset_id: str) -> list[int]:
+        """
+        Get all webhook IDs registered for a preset.
+
+        Args:
+            preset_id: Preset identifier
+
+        Returns:
+            List of webhook IDs
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "SELECT webhook_id FROM registered_webhooks WHERE preset_id = ?",
+                (preset_id,),
+            )
+            rows = await cursor.fetchall()
+
+        return [row[0] for row in rows]
+
+    async def delete_webhook(self, webhook_id: int) -> bool:
+        """
+        Remove webhook from tracking.
+
+        Args:
+            webhook_id: Nextcloud webhook ID to remove
+
+        Returns:
+            True if webhook was deleted, False if not found
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM registered_webhooks WHERE webhook_id = ?", (webhook_id,)
+            )
+            await db.commit()
+            deleted = cursor.rowcount > 0
+
+        if deleted:
+            logger.debug(f"Deleted webhook {webhook_id} from tracking")
+
+        return deleted
+
+    async def list_all_webhooks(self) -> list[dict]:
+        """
+        List all tracked webhooks with metadata.
+
+        Returns:
+            List of webhook dictionaries with keys: webhook_id, preset_id, created_at
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "SELECT webhook_id, preset_id, created_at FROM registered_webhooks ORDER BY created_at DESC"
+            )
+            rows = await cursor.fetchall()
+
+        return [
+            {"webhook_id": row[0], "preset_id": row[1], "created_at": row[2]}
+            for row in rows
+        ]
+
+    async def clear_preset_webhooks(self, preset_id: str) -> int:
+        """
+        Delete all webhooks for a preset (bulk operation).
+
+        Args:
+            preset_id: Preset identifier
+
+        Returns:
+            Number of webhooks deleted
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM registered_webhooks WHERE preset_id = ?", (preset_id,)
+            )
+            await db.commit()
+            deleted = cursor.rowcount
+
+        if deleted > 0:
+            logger.debug(f"Cleared {deleted} webhook(s) for preset '{preset_id}'")
+
+        return deleted
+

 async def generate_encryption_key() -> str:
    """
@@ -1039,7 +1310,7 @@ async def generate_encryption_key() -> str:

 # Example usage
 if __name__ == "__main__":
-    import asyncio
+    import anyio

    async def main():
        # Generate a key for testing
@@ -1047,4 +1318,4 @@ if __name__ == "__main__":
        print(f"Generated encryption key: {key}")
        print(f"Set this in your environment: export TOKEN_ENCRYPTION_KEY='{key}'")

-    asyncio.run(main())
+    anyio.run(main)
@@ -0,0 +1,524 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1">
+    <meta name="apple-mobile-web-app-capable" content="yes">
+    <meta name="theme-color" content="#0082c9">
+    <title>{% block title %}Nextcloud MCP Server{% endblock %}</title>
+
+    <!-- Favicon -->
+    <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' width='32' height='32' viewBox='0 0 512 512'><rect width='512' height='512' rx='80' ry='80' fill='%230082C9'/><path d='M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z' fill='%23fff'/></svg>">
+
+    <!-- Open Sans font -->
+    <style>
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: normal;
+            src: local('Open Sans'), local('OpenSans');
+        }
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: bold;
+            src: local('Open Sans Semibold'), local('OpenSans-Semibold');
+        }
+    </style>
+
+    {% block extra_head %}{% endblock %}
+
+    <style>
+        /* Nextcloud App Design System */
+
+        /* CSS Variables */
+        :root {
+            /* Primary Colors */
+            --color-primary: #00679e;
+            --color-primary-element: #00679e;
+            --color-primary-light: #e5eff5;
+            --color-primary-element-light: #e5eff5;
+
+            /* Background Colors */
+            --color-main-background: #ffffff;
+            --color-background-dark: #ededed;
+            --color-background-hover: #f5f5f5;
+
+            /* Text Colors */
+            --color-main-text: #222222;
+            --color-text-maxcontrast: #6b6b6b;
+            --color-text-light: #767676;
+
+            /* Border Colors */
+            --color-border: #ededed;
+            --color-border-dark: #dbdbdb;
+
+            /* Borders & Radius */
+            --border-radius: 3px;
+            --border-radius-large: 10px;
+            --border-radius-pill: 100px;
+
+            /* Spacing */
+            --default-grid-baseline: 4px;
+            --default-clickable-area: 44px;
+        }
+
+        /* SVG Icon Styles */
+        .nav-icon {
+            width: 20px;
+            height: 20px;
+            display: inline-block;
+            fill: var(--color-main-text);
+            opacity: 0.7;
+        }
+
+        .app-navigation-entry.active .nav-icon {
+            fill: var(--color-primary-element);
+            opacity: 1;
+        }
+
+        /* General */
+        * {
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+            color: var(--color-main-text);
+            background: var(--color-main-background);
+            margin: 0;
+            padding: 0;
+        }
+
+        h1, h2, h3 {
+            font-weight: 300;
+            line-height: 1.2;
+        }
+
+        h1 {
+            font-size: 32px;
+            margin: 0 0 20px 0;
+            color: var(--color-main-text);
+        }
+
+        h2 {
+            font-size: 20px;
+            margin: 20px 0 12px 0;
+            color: var(--color-main-text);
+            border-bottom: 1px solid var(--color-border);
+            padding-bottom: 8px;
+        }
+
+        h3 {
+            font-size: 16px;
+            margin: 16px 0 8px 0;
+            color: var(--color-main-text);
+            font-weight: 500;
+        }
+
+        img {
+            max-width: 100%;
+        }
+
+        /* App Header (simplified, no full menu) */
+        .app-header {
+            height: 50px;
+            background: var(--color-primary-element);
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            position: sticky;
+            top: 0;
+            z-index: 100;
+            display: flex;
+            align-items: center;
+            padding: 0 20px;
+        }
+
+        .app-header__brand {
+            color: white;
+            font-size: 18px;
+            font-weight: 600;
+            text-decoration: none;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+
+        .app-header__brand:hover {
+            opacity: 0.9;
+        }
+
+        .app-header__logo {
+            height: 32px;
+            width: 32px;
+            fill: white;
+        }
+
+        /* App Layout */
+        .app-content-wrapper {
+            display: flex;
+            height: calc(100vh - 50px);
+            overflow: hidden;
+        }
+
+        /* Side Navigation */
+        #app-navigation {
+            width: 250px;
+            background: var(--color-main-background);
+            border-right: 1px solid var(--color-border);
+            display: flex;
+            flex-direction: column;
+            flex-shrink: 0;
+            transition: margin-left 0.3s ease;
+        }
+
+        #app-navigation.app-navigation--closed {
+            margin-left: -250px;
+        }
+
+        .app-navigation__content {
+            flex: 1;
+            overflow-y: auto;
+            padding: 8px;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .app-navigation-list {
+            list-style: none;
+            padding: 0;
+            margin: 0;
+            flex: 1;
+        }
+
+        .app-navigation-entry {
+            position: relative;
+            margin-bottom: 2px;
+        }
+
+        .app-navigation-entry__wrapper {
+            display: flex;
+            align-items: center;
+            position: relative;
+        }
+
+        .app-navigation-entry-link {
+            display: flex;
+            align-items: center;
+            padding: 0 8px;
+            min-height: var(--default-clickable-area);
+            border-radius: var(--border-radius);
+            transition: background-color 100ms ease-in-out;
+            text-decoration: none;
+            color: var(--color-main-text);
+            flex: 1;
+            font-size: 14px;
+        }
+
+        .app-navigation-entry-link:hover {
+            background-color: var(--color-background-hover);
+        }
+
+        .app-navigation-entry.active .app-navigation-entry-link {
+            background-color: var(--color-primary-element-light);
+            font-weight: 500;
+        }
+
+        .app-navigation-entry-icon {
+            width: var(--default-clickable-area);
+            height: var(--default-clickable-area);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            margin-right: 0;
+        }
+
+        .app-navigation-entry__name {
+            flex: 1;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+
+        .app-navigation-entry__counter {
+            margin-left: auto;
+            padding: 2px 6px;
+            border-radius: var(--border-radius-pill);
+            background-color: var(--color-background-dark);
+            font-size: 11px;
+            color: var(--color-text-maxcontrast);
+            min-width: 20px;
+            text-align: center;
+        }
+
+        .app-navigation__settings {
+            list-style: none;
+            padding: 8px 0 0 0;
+            margin: 8px 0 0 0;
+            border-top: 1px solid var(--color-border);
+            flex-shrink: 0;
+        }
+
+        .app-navigation-toggle {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            position: fixed;
+            top: 60px;
+            left: 10px;
+            z-index: 110;
+            background: var(--color-main-background);
+            border: 1px solid var(--color-border);
+            border-radius: var(--border-radius);
+            padding: 8px 12px;
+            cursor: pointer;
+            box-shadow: 0 0 5px rgba(0,0,0,0.1);
+            transition: left 0.3s ease;
+        }
+
+        .app-navigation-toggle:hover {
+            background: var(--color-background-hover);
+        }
+
+        #app-navigation:not(.app-navigation--closed) ~ * .app-navigation-toggle {
+            left: 260px;
+        }
+
+        /* Main Content Area */
+        #app-content {
+            flex: 1;
+            overflow-y: auto;
+            background: var(--color-main-background);
+        }
+
+        .page-content {
+            max-width: 1000px;
+            margin: 0 auto;
+            padding: 24px;
+        }
+
+        .content-section {
+            background: var(--color-main-background);
+            border-radius: 0;
+            padding: 0;
+            box-shadow: none;
+        }
+
+        .content-section h1 {
+            font-size: 24px;
+            font-weight: 600;
+            margin-bottom: 24px;
+        }
+
+        .content-section h2 {
+            font-size: 18px;
+            font-weight: 500;
+            margin: 24px 0 12px 0;
+            border-bottom: none;
+            padding-bottom: 0;
+        }
+
+        .content-section h3 {
+            font-size: 16px;
+            font-weight: 500;
+        }
+
+        /* Responsive */
+        @media (max-width: 768px) {
+            #app-navigation {
+                position: fixed;
+                height: calc(100vh - 50px);
+                z-index: 105;
+                box-shadow: 2px 0 8px rgba(0,0,0,0.1);
+            }
+
+            .page-content {
+                padding: 16px;
+            }
+        }
+
+        /* Footer */
+        footer.page-footer {
+            background-color: #0F0833;
+            color: #ffffff;
+            padding: 40px 0;
+            margin-top: 60px;
+        }
+
+        footer.page-footer .bootstrap-container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 0 20px;
+        }
+
+        footer.page-footer h1 {
+            font-size: 15px;
+            font-weight: bold;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 20px;
+        }
+
+        footer.page-footer ul {
+            list-style-type: none;
+            padding-left: 0;
+        }
+
+        footer.page-footer li {
+            font-size: 13px;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 0;
+        }
+
+        footer.page-footer li a {
+            color: #ffffff;
+            text-decoration: none;
+            display: block;
+            padding: 4px 0;
+        }
+
+        footer.page-footer li a:hover {
+            text-decoration: underline;
+        }
+
+        footer.page-footer p {
+            font-size: 15px;
+            line-height: 1.8;
+            color: #ffffff;
+        }
+
+        footer.page-footer p.copyright {
+            color: rgba(255, 255, 255, 0.5);
+            font-size: 13px;
+            text-align: center;
+            margin-top: 30px;
+        }
+
+        /* Buttons */
+        .btn {
+            border-radius: 50px;
+            padding: 10px 20px;
+            text-decoration: none;
+            display: inline-block;
+            cursor: pointer;
+            border: none;
+            font-size: 14px;
+            transition: all 0.3s;
+        }
+
+        .btn-primary {
+            background: #0082C9;
+            border: 1px solid #0062C9;
+            color: #fff;
+        }
+
+        .btn-primary:hover {
+            background: #006ba3;
+        }
+
+        /* Tables */
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+        }
+
+        td {
+            padding: 12px 8px;
+            border-bottom: 1px solid var(--color-border);
+            font-size: 14px;
+        }
+
+        td:first-child {
+            width: 180px;
+            color: var(--color-text-maxcontrast);
+            font-weight: 500;
+        }
+
+        code {
+            background-color: var(--color-background-dark);
+            padding: 2px 6px;
+            border-radius: var(--border-radius);
+            font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+            font-size: 90%;
+            color: var(--color-main-text);
+        }
+
+        /* Badges */
+        .badge {
+            display: inline-block;
+            padding: 3px 8px;
+            border-radius: 12px;
+            font-size: 12px;
+            font-weight: bold;
+            text-transform: uppercase;
+        }
+
+        .badge-oauth {
+            background-color: #4caf50;
+            color: white;
+        }
+
+        .badge-basic {
+            background-color: #2196f3;
+            color: white;
+        }
+
+        /* Messages */
+        .warning {
+            background-color: #fff3cd;
+            border-left: 4px solid #ffc107;
+            padding: 15px;
+            margin: 15px 0;
+            color: #856404;
+        }
+
+        .info-message {
+            background-color: #e3f2fd;
+            border-left: 4px solid #2196f3;
+            padding: 15px;
+            margin: 15px 0;
+            color: #1565c0;
+        }
+
+        .error {
+            background-color: #ffebee;
+            border-left: 4px solid #d32f2f;
+            padding: 15px;
+            margin: 15px 0;
+            color: #c62828;
+        }
+
+        .success {
+            background-color: #e8f5e9;
+            border: 2px solid #4caf50;
+            padding: 30px;
+            border-radius: 8px;
+            text-align: center;
+        }
+
+        .success h1 {
+            color: #4caf50;
+        }
+
+        {% block extra_styles %}{% endblock %}
+    </style>
+</head>
+<body>
+    <!-- App Header -->
+    <header class="app-header">
+        <a href="/app" class="app-header__brand">
+            <svg class="app-header__logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+                <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+            </svg>
+            <span>Nextcloud MCP Server</span>
+        </a>
+    </header>
+
+    <!-- App Content Wrapper (Sidebar + Main Content) -->
+    {% block content %}{% endblock %}
+
+    {% block scripts %}{% endblock %}
+</body>
+</html>
@@ -0,0 +1,19 @@
+{% extends "base.html" %}
+
+{% block title %}{{ error_title|default('Error') }} - Nextcloud MCP Server{% endblock %}
+
+{% block content %}
+<h1>{{ error_title|default('Error') }}</h1>
+
+<div class="error">
+    <strong>Error:</strong> {{ error_message }}
+</div>
+
+{% if login_url %}
+<p><a href="{{ login_url }}" class="btn btn-primary">Login again</a></p>
+{% endif %}
+
+{% if back_url %}
+<p><a href="{{ back_url }}" class="btn">Go Back</a></p>
+{% endif %}
+{% endblock %}
@@ -0,0 +1,21 @@
+{% extends "base.html" %}
+
+{% block title %}{{ success_title|default('Success') }} - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+{% if redirect_url and redirect_delay %}
+<meta http-equiv="refresh" content="{{ redirect_delay }};url={{ redirect_url }}">
+{% endif %}
+{% endblock %}
+
+{% block content %}
+<div class="success">
+    <h1>{{ success_title|default('✓ Success') }}</h1>
+    {% for message in success_messages %}
+    <p>{{ message }}</p>
+    {% endfor %}
+    {% if redirect_url %}
+    <p>Redirecting...</p>
+    {% endif %}
+</div>
+{% endblock %}
@@ -0,0 +1,650 @@
+{% extends "base.html" %}
+
+{% block title %}Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- htmx for dynamic loading -->
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+
+    <!-- Alpine.js for state management -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+
+    <!-- Plotly.js for vector visualization -->
+    <script src="https://cdn.plot.ly/plotly-3.3.0.min.js"></script>
+
+    <!-- Vector Viz static assets -->
+    <link rel="stylesheet" href="/app/static/vector-viz.css">
+{% endblock %}
+
+{% block extra_styles %}
+    /* Smooth htmx transitions */
+    .htmx-swapping {
+        opacity: 0;
+        transition: opacity 200ms ease-out;
+    }
+
+    .htmx-settling {
+        opacity: 1;
+        transition: opacity 200ms ease-in;
+    }
+
+    /* Logout button styling */
+    .logout-section {
+        margin-top: 20px;
+        padding-top: 20px;
+        border-top: 1px solid var(--color-border);
+    }
+
+    /* Welcome tab specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper" x-data="{ activeSection: 'welcome', navOpen: true }">
+    <!-- Side Navigation -->
+    <nav id="app-navigation" :class="{ 'app-navigation--closed': !navOpen }">
+        <div class="app-navigation__content">
+            <!-- Navigation List -->
+            <ul class="app-navigation-list">
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'welcome' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'welcome'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10,20V14H14V20H19V12H22L12,3L2,12H5V20H10Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Welcome</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'user-info' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'user-info'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">User Info</span>
+                        </a>
+                    </div>
+                </li>
+
+                {% if show_vector_sync_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-sync' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-sync'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Sync</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-viz' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-viz'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Viz</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+
+                {% if show_webhooks_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'webhooks' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'webhooks'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10.59,13.41C11,13.8 11,14.44 10.59,14.83C10.2,15.22 9.56,15.22 9.17,14.83C7.22,12.88 7.22,9.71 9.17,7.76V7.76L12.71,4.22C14.66,2.27 17.83,2.27 19.78,4.22C21.73,6.17 21.73,9.34 19.78,11.29L18.29,12.78C18.3,11.96 18.17,11.14 17.89,10.36L18.36,9.88C19.54,8.71 19.54,6.81 18.36,5.64C17.19,4.46 15.29,4.46 14.12,5.64L10.59,9.17C9.41,10.34 9.41,12.24 10.59,13.41M13.41,9.17C13.8,8.78 14.44,8.78 14.83,9.17C16.78,11.12 16.78,14.29 14.83,16.24V16.24L11.29,19.78C9.34,21.73 6.17,21.73 4.22,19.78C2.27,17.83 2.27,14.66 4.22,12.71L5.71,11.22C5.7,12.04 5.83,12.86 6.11,13.65L5.64,14.12C4.46,15.29 4.46,17.19 5.64,18.36C6.81,19.54 8.71,19.54 9.88,18.36L13.41,14.83C14.59,13.66 14.59,11.76 13.41,10.59C13,10.2 13,9.56 13.41,9.17Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Webhooks</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+            </ul>
+
+            <!-- Settings/Logout at bottom -->
+            {% if logout_url %}
+            <ul class="app-navigation__settings">
+                <li class="app-navigation-entry">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="{{ logout_url }}" class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M16,17V14H9V10H16V7L21,12L16,17M14,2A2,2 0 0,1 16,4V6H14V4H5V20H14V18H16V20A2,2 0 0,1 14,22H5A2,2 0 0,1 3,20V4A2,2 0 0,1 5,2H14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Logout</span>
+                        </a>
+                    </div>
+                </li>
+            </ul>
+            {% endif %}
+        </div>
+
+        <!-- Toggle Button (mobile) -->
+        <button @click="navOpen = !navOpen"
+                class="app-navigation-toggle"
+                :aria-expanded="navOpen.toString()">
+            ☰
+        </button>
+    </nav>
+
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Welcome Section -->
+            <div x-show="activeSection === 'welcome'">
+                <!-- Hero Section -->
+                <div class="hero-section">
+                    <h1>Welcome to Nextcloud MCP Server</h1>
+                    <p>
+                        Interactive user interface for semantic search and document retrieval.
+                        Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                    </p>
+                </div>
+
+                <!-- Authentication Status -->
+                <div class="auth-status">
+                    <svg viewBox="0 0 24 24">
+                        <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                    </svg>
+                    <div class="auth-status-text">
+                        <strong>Authenticated as: {{ username }}</strong>
+                        <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                    </div>
+                </div>
+
+                {% if vector_sync_enabled %}
+                <!-- Vector Sync Enabled Content -->
+                <div class="info-section">
+                    <h2>About Semantic Search</h2>
+                    <p>
+                        This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                        Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                        conceptually similar content across your Nextcloud apps.
+                    </p>
+                    <p>
+                        <strong>How it works:</strong>
+                    </p>
+                    <ul>
+                        <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                        <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                        <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                        <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                    </ul>
+                </div>
+
+                <div class="info-section">
+                    <h2>RAG Workflow Integration</h2>
+                    <p>
+                        This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                        Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                    </p>
+                    <ul>
+                        <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                        <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                        <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                    </ul>
+
+                    <!-- RAG Workflow Diagram -->
+                    <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; overflow-x: auto;">
+                        <div style="text-align: center; font-weight: 600; margin-bottom: 20px; color: var(--color-primary-element); font-size: 16px;">
+                            MCP Sampling RAG Workflow
+                        </div>
+
+                        <!-- Four-component bidirectional flow -->
+                        <div style="max-width: 1000px; margin: 0 auto;">
+                            <div style="display: grid; grid-template-columns: 0.7fr auto 1fr auto 1fr auto 0.9fr; gap: 10px; align-items: center;">
+                                <!-- User -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 14px; text-align: center;">
+                                    <div style="font-size: 26px; margin-bottom: 5px;">👤</div>
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px;">User</div>
+                                    <div style="font-size: 9px; color: var(--color-text-maxcontrast); font-style: italic; margin-top: 5px; line-height: 1.2;">
+                                        "What are health<br>benefits of coffee?"
+                                    </div>
+                                </div>
+
+                                <!-- Arrow User <-> Client -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-text-maxcontrast);">↔</div>
+                                </div>
+
+                                <!-- MCP Client + LLM (combined) -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Client + LLM</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; margin-bottom: 6px;">
+                                        <div style="font-size: 9px; color: var(--color-text-maxcontrast);">(Claude Code)</div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; border: 2px solid var(--color-primary-element);">
+                                        <div style="font-size: 16px; margin-bottom: 2px;">🧠</div>
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 10px;">Client's LLM</div>
+                                        <div style="font-size: 8px; color: var(--color-text-maxcontrast);">(Claude)</div>
+                                    </div>
+
+                                    <div style="margin-top: 8px; font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        <strong>Enables RAG:</strong><br>
+                                        Receives context,<br>
+                                        generates answer
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Client <-> Server -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Query +<br>
+                                        Sampling
+                                    </div>
+                                </div>
+
+                                <!-- MCP Server -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Server</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">1. Semantic Search</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Vector embeddings<br>
+                                            BM25 Hybrid + RRF
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">2. Retrieve Context</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Top relevant docs<br>
+                                            with scores
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">3. Format Response</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Document chunks<br>
+                                            with citations
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">4. Send to LLM</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Via MCP sampling<br>
+                                            for answer generation
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Server <-> Nextcloud -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Retrieve
+                                    </div>
+                                </div>
+
+                                <!-- Nextcloud -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 12px; text-align: center; position: relative;">
+                                    <img src="/app/static/nextcloud-logo.png" alt="Nextcloud" style="width: 40px; height: 40px; margin-bottom: 6px;" />
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px; margin-bottom: 4px;">Nextcloud</div>
+                                    <div style="font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        Notes, Calendar,<br>
+                                        Files, Contacts,<br>
+                                        Deck
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- Explanation below diagram -->
+                            <div style="margin-top: 24px; padding: 16px; background: var(--color-background-hover); border-radius: var(--border-radius); border-left: 4px solid var(--color-primary-element);">
+                                <div style="font-size: 12px; color: var(--color-main-text); line-height: 1.6;">
+                                    <strong>How RAG works via MCP Sampling:</strong>
+                                </div>
+                                <ol style="margin: 8px 0 0 0; padding-left: 20px; font-size: 11px; color: var(--color-text-maxcontrast); line-height: 1.6;">
+                                    <li>User asks question through MCP Client</li>
+                                    <li>Client sends query to MCP Server</li>
+                                    <li>Server retrieves relevant document context from Nextcloud</li>
+                                    <li><strong>Server sends context back to Client's LLM</strong> (MCP Sampling)</li>
+                                    <li>Client's LLM generates answer with citations using retrieved context</li>
+                                    <li>Answer returned to user</li>
+                                </ol>
+                                <div style="margin-top: 8px; font-size: 10px; color: var(--color-text-maxcontrast); font-style: italic;">
+                                    The server has no LLM - it only retrieves context. The client's existing LLM is reused for answer generation.
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <p style="margin-top: 16px;">
+                        <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                        Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                        full control over which model is used and ensuring all processing happens client-side.
+                    </p>
+
+                    <p>
+                        By using this interface, you can preview search results, understand relevance scores, and verify
+                        that the system retrieves the right information before it reaches the LLM.
+                    </p>
+                </div>
+
+                <!-- Feature Cards -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-sync'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Sync Status</h3>
+                        <p>
+                            Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                            and synchronization status.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-viz'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Visualization</h3>
+                        <p>
+                            Interactive search interface with 2D PCA visualization. Compare algorithms,
+                            view relevance scores, and explore matched document chunks.
+                        </p>
+                    </a>
+                </div>
+
+                {% else %}
+                <!-- Vector Sync Disabled Content -->
+                <div class="warning">
+                    <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                    <p>
+                        Semantic search and vector visualization features are currently disabled.
+                        To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                    </p>
+                    <p style="margin-bottom: 0;">
+                        <strong>Learn more:</strong>
+                        <a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                            Configuration Guide
+                        </a>
+                    </p>
+                </div>
+
+                <!-- Limited Feature Card -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+                </div>
+                {% endif %}
+
+                <!-- Documentation Section -->
+                <div class="info-section" style="margin-top: 40px;">
+                    <h2>Documentation</h2>
+                    <p>
+                        For detailed information about configuration, authentication modes, and advanced features,
+                        please refer to the project documentation:
+                    </p>
+                    <ul>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                        {% if vector_sync_enabled %}
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                        {% endif %}
+                    </ul>
+                </div>
+            </div>
+
+            <!-- User Info Section -->
+            <div x-show="activeSection === 'user-info'">
+                <div class="content-section">
+                    <h1>User Information</h1>
+                    {{ user_info_tab_html|safe }}
+                </div>
+            </div>
+
+            {% if show_vector_sync_tab %}
+            <!-- Vector Sync Section -->
+            <div x-show="activeSection === 'vector-sync'">
+                <div class="content-section">
+                    <h1>Vector Sync Status</h1>
+                    {{ vector_sync_tab_html|safe }}
+                </div>
+            </div>
+
+            <!-- Vector Viz Section -->
+            <div x-show="activeSection === 'vector-viz'">
+                <div class="content-section">
+                    <h1>Vector Visualization</h1>
+                    <div hx-get="/app/vector-viz" hx-trigger="load" hx-swap="outerHTML">
+                        <p style="color: #999;">Loading vector visualization...</p>
+                    </div>
+                </div>
+            </div>
+            {% endif %}
+
+            {% if show_webhooks_tab %}
+            <!-- Webhooks Section -->
+            <div x-show="activeSection === 'webhooks'">
+                <div class="content-section">
+                    <h1>Webhook Management</h1>
+                    {{ webhooks_tab_html|safe }}
+                </div>
+            </div>
+            {% endif %}
+        </div>
+    </main>
+</div>
+
+<script>
+    // Set global Nextcloud base URL for use in external JS
+    window.NEXTCLOUD_BASE_URL = '{{ nextcloud_host_for_links }}';
+</script>
+<script src="/app/static/vector-viz.js"></script>
+{% endblock %}
@@ -0,0 +1,180 @@
+<div x-data="vizApp()">
+    <div class="viz-layout">
+        <!-- Top: Search Controls -->
+        <div class="viz-card viz-controls-card">
+            <form @submit.prevent="executeSearch">
+                <div class="viz-controls-grid">
+                    <div class="viz-control-group">
+                        <label>Search Query</label>
+                        <input type="text" x-model="query" placeholder="Enter search query..." required />
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Algorithm</label>
+                        <select x-model="algorithm">
+                            <option value="semantic">Semantic (Dense)</option>
+                            <option value="bm25_hybrid" selected>BM25 Hybrid</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Fusion</label>
+                        <select x-model="fusion" :disabled="algorithm !== 'bm25_hybrid'" :style="algorithm !== 'bm25_hybrid' ? 'opacity: 0.5; cursor: not-allowed;' : ''">
+                            <option value="rrf" selected>RRF</option>
+                            <option value="dbsf">DBSF</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="submit" class="viz-btn">Search</button>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="button" class="viz-btn-secondary" @click="showAdvanced = !showAdvanced">
+                            <span x-text="showAdvanced ? 'Hide' : 'Advanced'"></span>
+                        </button>
+                    </div>
+                </div>
+
+                <!-- Advanced Options (Collapsible) -->
+                <div x-show="showAdvanced" style="margin-top: 16px;">
+                    <div class="viz-controls-grid" style="grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));">
+                        <div class="viz-control-group">
+                            <label>Document Types</label>
+                            <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 8px; font-size: 13px;">
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="" style="margin-right: 4px;">
+                                    <span>All</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="note" style="margin-right: 4px;">
+                                    <span>Notes</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="file" style="margin-right: 4px;">
+                                    <span>Files</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="calendar" style="margin-right: 4px;">
+                                    <span>Calendar</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="contact" style="margin-right: 4px;">
+                                    <span>Contacts</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="deck" style="margin-right: 4px;">
+                                    <span>Deck</span>
+                                </label>
+                            </div>
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Score Threshold</label>
+                            <input type="number" x-model.number="scoreThreshold" min="0" max="1" step="any" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Result Limit</label>
+                            <input type="number" x-model.number="limit" min="1" max="1000" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Display Options</label>
+                            <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal; margin-top: 4px;">
+                                <input type="checkbox" x-model="showQueryPoint" @change="updatePlot()" style="margin-right: 6px;">
+                                <span>Show Query Point</span>
+                            </label>
+                        </div>
+                    </div>
+                </div>
+            </form>
+        </div>
+
+        <!-- Plot -->
+        <div class="viz-card viz-card-plot">
+            <div id="viz-plot-container">
+                <div x-show="loading" class="viz-loading-overlay" x-transition.opacity.duration.200ms>
+                    Executing search and computing PCA projection...
+                </div>
+                <div id="viz-plot" x-show="!loading" x-transition.opacity.duration.200ms></div>
+            </div>
+        </div>
+
+        <!-- Results -->
+        <div class="viz-card" style="flex: 0 0 auto;">
+            <h3 style="margin-top: 0;">Search Results (<span x-text="loading ? '...' : results.length"></span>)</h3>
+
+        <div x-show="loading" class="viz-loading" x-transition.opacity.duration.200ms>
+            Loading results...
+        </div>
+
+        <div x-show="!loading && results.length === 0" class="viz-no-results" x-transition.opacity.duration.200ms>
+            No results found. Try a different query or adjust your search parameters.
+        </div>
+
+        <template x-if="!loading && results.length > 0">
+            <div x-transition.opacity.duration.200ms>
+                <template x-for="result in results" :key="`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`">
+                    <div style="padding: 12px; border-bottom: 1px solid #eee;">
+                        <a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
+                            <span x-text="result.title"></span>
+                        </a>
+                        <div style="font-size: 14px; color: #666; margin-top: 4px;"
+                             x-text="result.excerpt.length > 200 ? result.excerpt.substring(0, 200) + '...' : result.excerpt"></div>
+                        <div style="font-size: 12px; color: #999; margin-top: 4px;">
+                            Raw Score: <span x-text="result.original_score.toFixed(3)"></span>
+                            (<span x-text="(result.score * 100).toFixed(0)"></span>% relative) |
+                            Type: <span x-text="result.doc_type"></span>
+                        </div>
+
+                        <!-- Show Chunk button (only if chunk position is available) -->
+                        <template x-if="hasChunkPosition(result)">
+                            <button
+                                class="chunk-toggle-btn"
+                                @click="toggleChunk(result)"
+                                x-text="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`) ? 'Hide Chunk' : 'Show Chunk'"
+                            ></button>
+                        </template>
+
+                        <!-- Chunk context (expanded inline) -->
+                        <template x-if="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`)">
+                            <div class="chunk-context" x-transition.opacity.duration.200ms>
+                                <template x-if="chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div style="color: #666; font-style: italic;">Loading chunk...</div>
+                                </template>
+                                <template x-if="!chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div>
+                                        <!-- Highlighted page image for PDFs -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image">
+                                            <div class="chunk-image-container">
+                                                <div class="chunk-image-header">
+                                                    <span>Page <span x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"></span></span>
+                                                </div>
+                                                <img
+                                                    :src="'data:image/png;base64,' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image"
+                                                    :alt="'Page ' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"
+                                                    class="chunk-highlighted-image"
+                                                />
+                                            </div>
+                                        </template>
+                                        <!-- Text context -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_before">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                        <span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_after">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                    </div>
+                                </template>
+                            </div>
+                        </template>
+                    </div>
+                </template>
+            </div>
+        </template>
+        </div><!-- Search Results -->
+    </div><!-- .viz-layout -->
+</div><!-- x-data="vizApp()" -->
@@ -0,0 +1,392 @@
+{% extends "base.html" %}
+
+{% block title %}Welcome - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- Alpine.js for interactive elements -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+{% endblock %}
+
+{% block extra_styles %}
+    /* Welcome page specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper">
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Hero Section -->
+            <div class="hero-section">
+                <h1>Welcome to Nextcloud MCP Server</h1>
+                <p>
+                    Interactive user interface for semantic search and document retrieval.
+                    Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                </p>
+            </div>
+
+            <!-- Authentication Status -->
+            <div class="auth-status">
+                <svg viewBox="0 0 24 24">
+                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                </svg>
+                <div class="auth-status-text">
+                    <strong>Authenticated as: {{ username }}</strong>
+                    <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                </div>
+            </div>
+
+            {% if vector_sync_enabled %}
+            <!-- Vector Sync Enabled Content -->
+            <div class="info-section">
+                <h2>About Semantic Search</h2>
+                <p>
+                    This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                    Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                    conceptually similar content across your Nextcloud apps.
+                </p>
+                <p>
+                    <strong>How it works:</strong>
+                </p>
+                <ul>
+                    <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                    <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                    <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                    <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                </ul>
+            </div>
+
+            <div class="info-section">
+                <h2>RAG Workflow Integration</h2>
+                <p>
+                    This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                    Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                </p>
+                <ul>
+                    <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                    <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                    <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                </ul>
+
+                <!-- RAG Workflow Diagram -->
+                <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace; font-size: 13px; line-height: 1.8; overflow-x: auto;">
+                    <div style="text-align: center; font-weight: 600; margin-bottom: 16px; color: var(--color-primary-element); font-size: 14px;">
+                        MCP Sampling RAG Workflow
+                    </div>
+                    <pre style="margin: 0; color: var(--color-main-text);">
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  User asks: "What are health benefits of coffee?"
+│  (Claude Code)  │
+└────────┬────────┘
+         │ (1) User question
+         ↓
+┌────────────────────────────────────────────────────────────────────────┐
+│                      <strong>Nextcloud MCP Server</strong>                          │
+│  ┌──────────────────────────────────────────────────────────────────┐  │
+│  │ <strong>nc_semantic_search_answer</strong> Tool (MCP Sampling-enabled)      │  │
+│  │                                                                  │  │
+│  │  (2) Semantic Search                                             │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ Query: "health benefits of coffee"                     │     │  │
+│  │  │ → Convert to 768D vector embedding                     │     │  │
+│  │  │ → Search Qdrant (BM25 Hybrid + RRF fusion)             │     │  │
+│  │  │ → Retrieve top 5 relevant document chunks              │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (3) Construct Prompt with Context                               │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ "What are health benefits of coffee?                   │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Documents:                                             │     │  │
+│  │  │  - [MED-2155] Effects of habitual coffee consumption...│     │  │
+│  │  │  - [MED-1646] Beverage consumption guidance...         │     │  │
+│  │  │  - [MED-1627] Coffee and depression risk...            │     │  │
+│  │  │  ...                                                    │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Provide answer with citations."                        │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (4) MCP Sampling Request                                        │  │
+│  │  ─────────────────────────────────────────────────────────────> │  │
+│  └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+         │
+         │ Sampling request with prompt + context
+         ↓
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  (5) Client's LLM generates answer using retrieved context
+│    (Claude)     │      → "Coffee consumption (2-3 cups/day) is associated with
+└────────┬────────┘         reduced risk of type 2 diabetes, cardiovascular disease,
+         │                  and improved liver health (Document 1, 2)..."
+         │
+         │ (6) Answer with citations
+         ↓
+┌─────────────────┐
+│      User       │  Receives comprehensive answer with source citations
+└─────────────────┘</pre>
+                </div>
+
+                <p style="margin-top: 16px;">
+                    <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                    Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                    full control over which model is used and ensuring all processing happens client-side.
+                </p>
+
+                <p>
+                    By using this interface, you can preview search results, understand relevance scores, and verify
+                    that the system retrieves the right information before it reaches the LLM.
+                </p>
+            </div>
+
+            <!-- Feature Cards -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-sync" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Sync Status</h3>
+                    <p>
+                        Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                        and synchronization status.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-viz" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Visualization</h3>
+                    <p>
+                        Interactive search interface with 2D PCA visualization. Compare algorithms,
+                        view relevance scores, and explore matched document chunks.
+                    </p>
+                </a>
+            </div>
+
+            {% else %}
+            <!-- Vector Sync Disabled Content -->
+            <div class="warning">
+                <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                <p>
+                    Semantic search and vector visualization features are currently disabled.
+                    To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                </p>
+                <p style="margin-bottom: 0;">
+                    <strong>Learn more:</strong>
+                    <a href="https://github.com/YOUR_REPO/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                        Configuration Guide
+                    </a>
+                </p>
+            </div>
+
+            <!-- Limited Feature Card -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+            </div>
+            {% endif %}
+
+            <!-- Documentation Section -->
+            <div class="info-section" style="margin-top: 40px;">
+                <h2>Documentation</h2>
+                <p>
+                    For detailed information about configuration, authentication modes, and advanced features,
+                    please refer to the project documentation:
+                </p>
+                <ul>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                    {% if vector_sync_enabled %}
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                    {% endif %}
+                </ul>
+            </div>
+        </div>
+    </main>
+</div>
+{% endblock %}
@@ -14,16 +14,16 @@ The Token Broker provides:
 - Session vs background token separation (RFC 8693)
 """

-import asyncio
 import logging
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Optional, Tuple

+import anyio
 import httpx
 import jwt
 from cryptography.fernet import Fernet

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
 from nextcloud_mcp_server.auth.token_exchange import exchange_token_for_delegation

 logger = logging.getLogger(__name__)
@@ -43,7 +43,7 @@ class TokenCache:
        self._cache: Dict[str, Tuple[str, datetime]] = {}
        self._ttl = timedelta(seconds=ttl_seconds)
        self._early_refresh = timedelta(seconds=early_refresh_seconds)
-        self._lock = asyncio.Lock()
+        self._lock = anyio.Lock()

    async def get(self, user_id: str) -> Optional[str]:
        """Get cached token if valid."""
@@ -20,7 +20,7 @@ import httpx
 import jwt

 from ..config import get_settings
-from .refresh_token_storage import RefreshTokenStorage
+from .storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -1,492 +0,0 @@
-"""Token verification using Nextcloud OIDC userinfo endpoint."""
-
-import logging
-import time
-from typing import Any
-
-import httpx
-import jwt
-from jwt import PyJWKClient
-from mcp.server.auth.provider import AccessToken, TokenVerifier
-
-logger = logging.getLogger(__name__)
-
-
-class NextcloudTokenVerifier(TokenVerifier):
-    """
-    Validates access tokens using JWT verification with JWKS or userinfo endpoint fallback.
-
-    This verifier supports both JWT and opaque tokens:
-    1. For JWT tokens: Verifies signature with JWKS and extracts scopes from payload
-    2. For opaque tokens: Falls back to userinfo endpoint validation
-    3. Caches successful responses to avoid repeated API calls/verifications
-
-    JWT validation provides:
-    - Faster validation (no HTTP call needed)
-    - Direct scope extraction from token payload
-    - Signature verification using JWKS
-
-    Userinfo fallback provides:
-    - Support for opaque tokens
-    - Backward compatibility
-    - Additional validation layer
-    """
-
-    def __init__(
-        self,
-        nextcloud_host: str,
-        userinfo_uri: str,
-        jwks_uri: str | None = None,
-        issuer: str | None = None,
-        introspection_uri: str | None = None,
-        client_id: str | None = None,
-        client_secret: str | None = None,
-        cache_ttl: int = 3600,
-    ):
-        """
-        Initialize the token verifier.
-
-        Args:
-            nextcloud_host: Base URL of the Nextcloud instance (e.g., https://cloud.example.com)
-            userinfo_uri: Full URL to the userinfo endpoint
-            jwks_uri: Full URL to the JWKS endpoint (for JWT verification)
-            issuer: Expected issuer claim value (for JWT verification)
-            introspection_uri: Full URL to the introspection endpoint (for opaque tokens)
-            client_id: OAuth client ID (required for introspection)
-            client_secret: OAuth client secret (required for introspection)
-            cache_ttl: Time-to-live for cached tokens in seconds (default: 3600)
-        """
-        self.nextcloud_host = nextcloud_host.rstrip("/")
-        self.userinfo_uri = userinfo_uri
-        self.jwks_uri = jwks_uri
-        self.issuer = issuer
-        self.introspection_uri = introspection_uri
-        self.client_id = client_id
-        self.client_secret = client_secret
-        self.cache_ttl = cache_ttl
-
-        # Cache: token -> (userinfo, expiry_timestamp)
-        self._token_cache: dict[str, tuple[dict[str, Any], float]] = {}
-
-        # HTTP client for userinfo/introspection requests
-        self._client = httpx.AsyncClient(timeout=10.0)
-
-        # PyJWKClient for JWT verification (lazy initialization)
-        self._jwks_client: PyJWKClient | None = None
-        if jwks_uri:
-            logger.info(f"JWT verification enabled with JWKS URI: {jwks_uri}")
-            self._jwks_client = PyJWKClient(jwks_uri, cache_keys=True)
-
-        # Introspection support
-        if introspection_uri and client_id and client_secret:
-            logger.info(f"Token introspection enabled: {introspection_uri}")
-        elif introspection_uri:
-            logger.warning(
-                "Introspection URI provided but missing client credentials - introspection disabled"
-            )
-
-    async def verify_token(self, token: str) -> AccessToken | None:
-        """
-        Verify a bearer token using JWT verification, introspection, or userinfo endpoint.
-
-        This method:
-        1. Checks the cache first for recent validations
-        2. Attempts JWT verification if JWKS is configured and token looks like JWT
-        3. Falls back to introspection for opaque tokens (if configured)
-        4. Falls back to userinfo endpoint as last resort
-        5. Returns AccessToken with username and scopes
-
-        Args:
-            token: The bearer token to verify
-
-        Returns:
-            AccessToken if valid, None if invalid or expired
-        """
-        # Check cache first
-        cached = self._get_cached_token(token)
-        if cached:
-            logger.debug("Token found in cache")
-            return cached
-
-        # Try JWT verification first if enabled and token looks like JWT
-        is_jwt_format = self._is_jwt_format(token)
-        logger.debug(
-            f"Token format check: is_jwt_format={is_jwt_format}, _jwks_client={self._jwks_client is not None}"
-        )
-        if self._jwks_client and is_jwt_format:
-            logger.debug("Attempting JWT verification...")
-            jwt_result = self._verify_jwt(token)
-            if jwt_result:
-                logger.info("Token validated via JWT verification")
-                return jwt_result
-            else:
-                logger.warning("JWT verification failed, will try other methods")
-
-        # For opaque tokens, try introspection if available
-        if self.introspection_uri and self.client_id and self.client_secret:
-            logger.debug("Attempting token introspection...")
-            try:
-                introspection_result = await self._verify_via_introspection(token)
-                if introspection_result:
-                    logger.info("Token validated via introspection")
-                    return introspection_result
-            except Exception as e:
-                logger.warning(f"Introspection failed: {e}")
-
-        # Fall back to userinfo endpoint validation (last resort)
-        logger.debug("Attempting userinfo endpoint validation...")
-        try:
-            return await self._verify_via_userinfo(token)
-        except Exception as e:
-            logger.warning(f"Token verification failed: {e}")
-            return None
-
-    def _is_jwt_format(self, token: str) -> bool:
-        """
-        Check if token looks like a JWT (has 3 parts separated by dots).
-
-        Args:
-            token: The token to check
-
-        Returns:
-            True if token appears to be JWT format
-        """
-        return "." in token and token.count(".") == 2
-
-    def _verify_jwt(self, token: str) -> AccessToken | None:
-        """
-        Verify JWT token with signature validation using JWKS.
-
-        Args:
-            token: The JWT token to verify
-
-        Returns:
-            AccessToken if valid, None if invalid
-        """
-        try:
-            # Get signing key from JWKS
-            assert self._jwks_client is not None  # Caller should check before calling
-            signing_key = self._jwks_client.get_signing_key_from_jwt(token)
-
-            # Verify and decode JWT
-            # Accept tokens with audience: "mcp-server" or ["mcp-server", "nextcloud"]
-            # This allows:
-            # 1. Tokens from MCP clients (aud: "mcp-server")
-            # 2. Tokens for Nextcloud APIs (aud: "nextcloud")
-            # 3. Tokens for both (aud: ["mcp-server", "nextcloud"])
-            payload = jwt.decode(
-                token,
-                signing_key.key,
-                algorithms=["RS256"],
-                issuer=self.issuer,
-                audience=["mcp-server", "nextcloud"],  # Accept either audience
-                options={
-                    "verify_signature": True,
-                    "verify_exp": True,
-                    "verify_iat": True,
-                    "verify_iss": True if self.issuer else False,
-                    "verify_aud": True,  # Enable audience validation
-                },
-            )
-
-            logger.debug(f"JWT verified successfully for user: {payload.get('sub')}")
-            logger.debug(f"Full JWT payload: {payload}")
-
-            # Extract username (sub claim, with fallback to preferred_username)
-            # Some OIDC providers (like Keycloak) may not include sub in access tokens
-            username = payload.get("sub") or payload.get("preferred_username")
-            if not username:
-                logger.error(
-                    "No 'sub' or 'preferred_username' claim found in JWT payload"
-                )
-                return None
-
-            # Extract scopes from scope claim (space-separated string)
-            scope_string = payload.get("scope", "")
-            scopes = scope_string.split() if scope_string else []
-            logger.debug(
-                f"Extracted scopes from JWT - scope claim: '{scope_string}' -> scopes list: {scopes}"
-            )
-
-            # Extract expiration
-            exp = payload.get("exp")
-            if not exp:
-                logger.warning("No 'exp' claim in JWT, using default TTL")
-                exp = int(time.time() + self.cache_ttl)
-
-            # Cache the result
-            userinfo = {
-                "sub": username,
-                "scope": scope_string,
-                **{k: v for k, v in payload.items() if k not in ["sub", "scope"]},
-            }
-            self._token_cache[token] = (userinfo, exp)
-
-            return AccessToken(
-                token=token,
-                client_id=payload.get("client_id", ""),
-                scopes=scopes,
-                expires_at=exp,
-                resource=username,  # Store username in resource field (RFC 8707)
-            )
-
-        except jwt.ExpiredSignatureError:
-            logger.info("JWT token has expired")
-            return None
-        except jwt.InvalidIssuerError as e:
-            logger.warning(f"JWT issuer validation failed: {e}")
-            return None
-        except jwt.InvalidTokenError as e:
-            logger.warning(f"JWT validation failed: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error during JWT verification: {e}")
-            return None
-
-    async def _verify_via_introspection(self, token: str) -> AccessToken | None:
-        """
-        Validate token by calling the introspection endpoint (RFC 7662).
-
-        This method validates opaque tokens and retrieves their scopes.
-
-        Args:
-            token: The bearer token to introspect
-
-        Returns:
-            AccessToken if active, None if inactive or invalid
-        """
-        try:
-            # Introspection requires client authentication
-            response = await self._client.post(
-                self.introspection_uri,  # type: ignore
-                data={"token": token},
-                auth=(self.client_id, self.client_secret),
-            )
-
-            if response.status_code == 200:
-                introspection_data = response.json()
-
-                # Check if token is active
-                if not introspection_data.get("active", False):
-                    logger.info("Token introspection returned inactive=false")
-                    return None
-
-                logger.debug(
-                    f"Token introspected successfully for user: {introspection_data.get('sub')}"
-                )
-
-                # Extract username
-                username = introspection_data.get("sub") or introspection_data.get(
-                    "username"
-                )
-                if not username:
-                    logger.error("No username found in introspection response")
-                    return None
-
-                # Extract scopes (space-separated string)
-                scope_string = introspection_data.get("scope", "")
-                scopes = scope_string.split() if scope_string else []
-                logger.debug(f"Extracted scopes from introspection: {scopes}")
-
-                # Extract expiration
-                exp = introspection_data.get("exp")
-                if exp:
-                    expiry = float(exp)
-                else:
-                    logger.warning(
-                        "No 'exp' in introspection response, using default TTL"
-                    )
-                    expiry = time.time() + self.cache_ttl
-
-                # Cache the result
-                cache_data = {
-                    "sub": username,
-                    "scope": scope_string,
-                    **{
-                        k: v
-                        for k, v in introspection_data.items()
-                        if k not in ["sub", "scope", "active"]
-                    },
-                }
-                self._token_cache[token] = (cache_data, expiry)
-
-                return AccessToken(
-                    token=token,
-                    client_id=introspection_data.get("client_id", ""),
-                    scopes=scopes,
-                    expires_at=int(expiry),
-                    resource=username,
-                )
-
-            elif response.status_code in (400, 401, 403):
-                logger.warning(
-                    f"Token introspection failed: HTTP {response.status_code}. "
-                    f"This may indicate: (1) Client credentials mismatch - trying to introspect "
-                    f"token issued to different OAuth client, (2) Expired client credentials, "
-                    f"(3) Invalid token. Will fall back to userinfo endpoint. "
-                    f"Response: {response.text[:200] if response.text else 'empty'}"
-                )
-                return None
-            else:
-                logger.warning(
-                    f"Unexpected response from introspection: {response.status_code}. "
-                    f"Response: {response.text[:200] if response.text else 'empty'}"
-                )
-                return None
-
-        except httpx.TimeoutException:
-            logger.error("Timeout while introspecting token")
-            return None
-        except httpx.RequestError as e:
-            logger.error(f"Network error while introspecting token: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error during token introspection: {e}")
-            return None
-
-    async def _verify_via_userinfo(self, token: str) -> AccessToken | None:
-        """
-        Validate token by calling the userinfo endpoint.
-
-        Args:
-            token: The bearer token to verify
-
-        Returns:
-            AccessToken if valid, None otherwise
-        """
-        try:
-            response = await self._client.get(
-                self.userinfo_uri, headers={"Authorization": f"Bearer {token}"}
-            )
-
-            if response.status_code == 200:
-                userinfo = response.json()
-                logger.debug(
-                    f"Token validated successfully for user: {userinfo.get('sub')}"
-                )
-
-                # Cache the result
-                expiry = time.time() + self.cache_ttl
-                self._token_cache[token] = (userinfo, expiry)
-
-                # Create AccessToken with username in resource field (workaround for MCP SDK)
-                username = userinfo.get("sub") or userinfo.get("preferred_username")
-                if not username:
-                    logger.error("No username found in userinfo response")
-                    return None
-
-                return AccessToken(
-                    token=token,
-                    client_id="",  # Not available from userinfo
-                    scopes=self._extract_scopes(userinfo),
-                    expires_at=int(expiry),
-                    resource=username,  # Store username in resource field (RFC 8707)
-                )
-
-            elif response.status_code in (400, 401, 403):
-                logger.info(f"Token validation failed: HTTP {response.status_code}")
-                return None
-            else:
-                logger.warning(
-                    f"Unexpected response from userinfo: {response.status_code}"
-                )
-                return None
-
-        except httpx.TimeoutException:
-            logger.error("Timeout while validating token via userinfo endpoint")
-            return None
-        except httpx.RequestError as e:
-            logger.error(f"Network error while validating token: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error during token validation: {e}")
-            return None
-
-    def _get_cached_token(self, token: str) -> AccessToken | None:
-        """
-        Retrieve a token from cache if not expired.
-
-        Args:
-            token: The bearer token to look up
-
-        Returns:
-            AccessToken if cached and valid, None otherwise
-        """
-        if token not in self._token_cache:
-            return None
-
-        userinfo, expiry = self._token_cache[token]
-
-        # Check if expired
-        if time.time() >= expiry:
-            logger.debug("Cached token expired, removing from cache")
-            del self._token_cache[token]
-            return None
-
-        # Return cached AccessToken
-        username = userinfo.get("sub") or userinfo.get("preferred_username")
-        return AccessToken(
-            token=token,
-            client_id="",
-            scopes=self._extract_scopes(userinfo),
-            expires_at=int(expiry),
-            resource=username,
-        )
-
-    def _extract_scopes(self, userinfo: dict[str, Any]) -> list[str]:
-        """
-        Extract scopes from userinfo response.
-
-        First attempts to read actual scopes from the 'scope' field (RFC 8693).
-        If not present, infers scopes from the claims present in the response.
-
-        Args:
-            userinfo: The userinfo response dictionary
-
-        Returns:
-            List of scopes (actual or inferred)
-        """
-        # Try to get actual scopes from userinfo response (if OIDC provider includes it)
-        scope_string = userinfo.get("scope")
-        if scope_string:
-            scopes = scope_string.split() if isinstance(scope_string, str) else []
-            if scopes:
-                logger.debug(
-                    f"Using actual scopes from userinfo: {scopes} (scope field present)"
-                )
-                return scopes
-
-        # Fallback: Infer scopes from claims present in response
-        # This maintains backward compatibility with OIDC providers that don't
-        # include the scope field in userinfo responses
-        logger.debug(
-            "No scope field in userinfo response, inferring scopes from claims"
-        )
-        scopes = ["openid"]  # Always present
-
-        if "email" in userinfo:
-            scopes.append("email")
-
-        if any(
-            key in userinfo for key in ["name", "given_name", "family_name", "picture"]
-        ):
-            scopes.append("profile")
-
-        if "roles" in userinfo:
-            scopes.append("roles")
-
-        if "groups" in userinfo:
-            scopes.append("groups")
-
-        logger.debug(f"Inferred scopes from userinfo claims: {scopes}")
-        return scopes
-
-    def clear_cache(self):
-        """Clear the token cache."""
-        self._token_cache.clear()
-        logger.debug("Token cache cleared")
-
-    async def close(self):
-        """Cleanup resources."""
-        await self._client.aclose()
-        logger.debug("Token verifier closed")
@@ -0,0 +1,442 @@
+"""
+Unified Token Verifier for ADR-005 Token Audience Validation.
+
+This module replaces both NextcloudTokenVerifier and ProgressiveConsentTokenVerifier
+with a single implementation that supports two compliant OAuth modes:
+
+1. Multi-audience mode (default): Validates MCP audience per RFC 7519 (resource servers
+   validate only their own audience). Nextcloud independently validates its own audience.
+2. Token exchange mode (opt-in): Tokens have MCP audience only, exchanged for Nextcloud tokens
+
+Key Design Principles:
+- Token verification happens HERE (validates MCP audience per OAuth spec)
+- Token exchange happens in context_helper.py (when creating NextcloudClient)
+- No token passthrough allowed (complies with MCP Security Specification)
+- Token reuse IS allowed for multi-audience tokens (RFC 8707)
+"""
+
+import hashlib
+import logging
+import time
+from typing import Any
+
+import httpx
+import jwt
+from jwt import PyJWKClient
+from mcp.server.auth.provider import AccessToken, TokenVerifier
+
+from nextcloud_mcp_server.config import Settings
+from nextcloud_mcp_server.observability.metrics import (
+    oauth_token_cache_hits_total,
+    record_oauth_token_validation,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class UnifiedTokenVerifier(TokenVerifier):
+    """
+    Unified token verifier supporting both multi-audience and token exchange modes.
+    Compliant with MCP security specification - no token pass-through.
+
+    This verifier:
+    1. Validates tokens using JWT verification with JWKS or introspection fallback
+    2. Enforces proper audience validation based on configured mode
+    3. Caches successful validations to avoid repeated API calls
+
+    Mode Selection (via ENABLE_TOKEN_EXCHANGE setting):
+    - False/omit (default): Multi-audience mode - validates MCP audience only (per RFC 7519).
+      Nextcloud independently validates its own audience when receiving API calls.
+    - True: Exchange mode - requires MCP audience only, then exchanges for Nextcloud token
+    """
+
+    def __init__(self, settings: Settings):
+        """
+        Initialize the unified token verifier.
+
+        Args:
+            settings: Application settings containing OAuth configuration
+        """
+        self.settings = settings
+        self.mode = "exchange" if settings.enable_token_exchange else "multi-audience"
+
+        # Common components for all modes
+        self.http_client = httpx.AsyncClient(timeout=10.0)
+
+        # JWT verification support
+        self.jwks_client: PyJWKClient | None = None
+        if hasattr(settings, "jwks_uri") and settings.jwks_uri:
+            logger.info(f"JWT verification enabled with JWKS URI: {settings.jwks_uri}")
+            self.jwks_client = PyJWKClient(settings.jwks_uri, cache_keys=True)
+
+        # Introspection support (for opaque tokens)
+        self.introspection_uri: str | None = None
+        if (
+            hasattr(settings, "introspection_uri")
+            and settings.introspection_uri
+            and settings.oidc_client_id
+            and settings.oidc_client_secret
+        ):
+            self.introspection_uri = settings.introspection_uri
+            logger.info(f"Token introspection enabled: {self.introspection_uri}")
+
+        # Token cache: token_hash -> (userinfo, expiry_timestamp)
+        self._token_cache: dict[str, tuple[dict[str, Any], float]] = {}
+        self.cache_ttl = 3600  # 1 hour default
+
+        logger.info(
+            f"UnifiedTokenVerifier initialized in {self.mode} mode. "
+            f"MCP audience: {settings.oidc_client_id} or {settings.nextcloud_mcp_server_url}, "
+            f"Nextcloud resource URI: {settings.nextcloud_resource_uri}"
+        )
+
+    async def verify_token(self, token: str) -> AccessToken | None:
+        """
+        Verify token according to MCP TokenVerifier protocol.
+
+        Per RFC 7519, we validate only MCP audience. The mode determines what
+        happens AFTER verification in context_helper.py:
+        - Multi-audience mode: Use token directly (Nextcloud validates its own audience)
+        - Exchange mode: Exchange for Nextcloud-audience token via RFC 8693
+
+        Args:
+            token: Bearer token to verify
+
+        Returns:
+            AccessToken if valid with MCP audience, None otherwise
+        """
+        # Check cache first
+        cached = self._get_cached_token(token)
+        if cached:
+            logger.debug("Token found in cache")
+            oauth_token_cache_hits_total.labels(hit="true").inc()
+            return cached
+
+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
+        # Both modes do the same validation (MCP audience only)
+        return await self._verify_mcp_audience(token)
+
+    async def _verify_mcp_audience(self, token: str) -> AccessToken | None:
+        """
+        Validate token has MCP audience.
+
+        Per RFC 7519 Section 4.1.3, resource servers validate only their own
+        presence in the audience claim. We don't validate Nextcloud's audience -
+        that's Nextcloud's responsibility when it receives the token.
+
+        Args:
+            token: Bearer token to verify
+
+        Returns:
+            AccessToken if valid with MCP audience, None otherwise
+        """
+        validation_method = "unknown"
+        try:
+            # Attempt JWT verification first
+            if self._is_jwt_format(token) and self.jwks_client:
+                validation_method = "jwt"
+                payload = await self._verify_jwt_signature(token)
+                if payload:
+                    record_oauth_token_validation("jwt", "valid")
+                else:
+                    record_oauth_token_validation("jwt", "invalid")
+            else:
+                # Fall back to introspection for opaque tokens
+                validation_method = "introspect"
+                payload = await self._introspect_token(token)
+                if payload:
+                    record_oauth_token_validation("introspect", "valid")
+                else:
+                    record_oauth_token_validation("introspect", "invalid")
+                if not payload:
+                    return None
+
+            # Check payload is valid
+            if not payload:
+                return None
+
+            # Validate MCP audience is present
+            if not self._has_mcp_audience(payload):
+                audiences = payload.get("aud", [])
+                logger.error(
+                    f"Token rejected: Missing MCP audience. "
+                    f"Got {audiences}, need MCP ({self.settings.oidc_client_id} or "
+                    f"{self.settings.nextcloud_mcp_server_url})"
+                )
+                # Record as invalid due to audience mismatch
+                record_oauth_token_validation(validation_method, "invalid")
+                return None
+
+            # Log based on mode for clarity
+            if self.mode == "multi-audience":
+                logger.info(
+                    "MCP audience validated - token can be used directly "
+                    "(Nextcloud will validate its own audience)"
+                )
+            else:
+                logger.info(
+                    "MCP audience validated - token will be exchanged for Nextcloud access"
+                )
+
+            return self._create_access_token(token, payload)
+
+        except Exception as e:
+            logger.error(f"Token verification failed: {e}")
+            record_oauth_token_validation(validation_method, "error")
+            return None
+
+    def _has_mcp_audience(self, payload: dict[str, Any]) -> bool:
+        """
+        Check if token has MCP audience.
+
+        Per RFC 7519 Section 4.1.3, resource servers should only validate their own
+        presence in the audience claim. We don't validate Nextcloud's audience - that's
+        Nextcloud's responsibility when it receives the token.
+
+        Args:
+            payload: Decoded token payload
+
+        Returns:
+            True if MCP audience present, False otherwise
+        """
+        audiences = payload.get("aud", [])
+        if isinstance(audiences, str):
+            audiences = [audiences]
+
+        audiences_set = set(audiences)
+
+        # MCP must have at least one: client_id OR server_url OR server_url/mcp
+        return bool(
+            self.settings.oidc_client_id in audiences_set
+            or (
+                self.settings.nextcloud_mcp_server_url
+                and (
+                    self.settings.nextcloud_mcp_server_url in audiences_set
+                    or f"{self.settings.nextcloud_mcp_server_url}/mcp" in audiences_set
+                )
+            )
+        )
+
+    def _is_jwt_format(self, token: str) -> bool:
+        """
+        Check if token looks like a JWT (has 3 parts separated by dots).
+
+        Args:
+            token: The token to check
+
+        Returns:
+            True if token appears to be JWT format
+        """
+        return "." in token and token.count(".") == 2
+
+    async def _verify_jwt_signature(self, token: str) -> dict[str, Any] | None:
+        """
+        Verify JWT token with signature validation using JWKS.
+
+        Args:
+            token: JWT token to verify
+
+        Returns:
+            Decoded payload if valid, None if invalid
+        """
+        try:
+            assert self.jwks_client is not None  # Caller should check before calling
+
+            # Get signing key from JWKS
+            signing_key = self.jwks_client.get_signing_key_from_jwt(token)
+
+            # Verify and decode JWT
+            # Note: We don't validate audience here - that's done separately based on mode
+            payload = jwt.decode(
+                token,
+                signing_key.key,
+                algorithms=["RS256"],
+                issuer=(
+                    self.settings.oidc_issuer
+                    if hasattr(self.settings, "oidc_issuer")
+                    else None
+                ),
+                options={
+                    "verify_signature": True,
+                    "verify_exp": True,
+                    "verify_iat": True,
+                    "verify_iss": (
+                        True
+                        if hasattr(self.settings, "oidc_issuer")
+                        and self.settings.oidc_issuer
+                        else False
+                    ),
+                    "verify_aud": False,  # We handle audience validation separately
+                },
+            )
+
+            logger.debug(f"JWT signature verified for user: {payload.get('sub')}")
+            return payload
+
+        except jwt.ExpiredSignatureError:
+            logger.info("JWT token has expired")
+            return None
+        except jwt.InvalidIssuerError as e:
+            logger.warning(f"JWT issuer validation failed: {e}")
+            return None
+        except jwt.InvalidTokenError as e:
+            logger.warning(f"JWT validation failed: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Unexpected error during JWT verification: {e}")
+            return None
+
+    async def _introspect_token(self, token: str) -> dict[str, Any] | None:
+        """
+        Validate token by calling the introspection endpoint (RFC 7662).
+
+        Args:
+            token: Bearer token to introspect
+
+        Returns:
+            Token payload if active, None if inactive or invalid
+        """
+        if not self.introspection_uri:
+            logger.debug("No introspection endpoint configured")
+            return None
+
+        try:
+            # Introspection requires client authentication
+            response = await self.http_client.post(
+                self.introspection_uri,
+                data={"token": token},
+                auth=(self.settings.oidc_client_id, self.settings.oidc_client_secret),
+            )
+
+            if response.status_code == 200:
+                introspection_data = response.json()
+
+                # Check if token is active
+                if not introspection_data.get("active", False):
+                    logger.info("Token introspection returned inactive=false")
+                    return None
+
+                logger.debug(
+                    f"Token introspected successfully for user: {introspection_data.get('sub')}"
+                )
+                return introspection_data
+
+            elif response.status_code in (400, 401, 403):
+                logger.warning(
+                    f"Token introspection failed: HTTP {response.status_code}. "
+                    f"Response: {response.text[:200] if response.text else 'empty'}"
+                )
+                return None
+            else:
+                logger.warning(
+                    f"Unexpected response from introspection: {response.status_code}. "
+                    f"Response: {response.text[:200] if response.text else 'empty'}"
+                )
+                return None
+
+        except httpx.TimeoutException:
+            logger.error("Timeout while introspecting token")
+            return None
+        except httpx.RequestError as e:
+            logger.error(f"Network error while introspecting token: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Unexpected error during token introspection: {e}")
+            return None
+
+    def _create_access_token(
+        self, token: str, payload: dict[str, Any]
+    ) -> AccessToken | None:
+        """
+        Create AccessToken object from validated token payload.
+
+        Args:
+            token: The bearer token
+            payload: Validated token payload
+
+        Returns:
+            AccessToken object or None if required fields missing
+        """
+        # Extract username (sub claim, with fallback to preferred_username)
+        username = payload.get("sub") or payload.get("preferred_username")
+        if not username:
+            logger.error(
+                "No 'sub' or 'preferred_username' claim found in token payload"
+            )
+            return None
+
+        # Extract scopes from scope claim (space-separated string)
+        scope_string = payload.get("scope", "")
+        scopes = scope_string.split() if scope_string else []
+        logger.debug(
+            f"Extracted scopes from token - scope claim: '{scope_string}' -> scopes list: {scopes}"
+        )
+
+        # Extract expiration
+        exp = payload.get("exp")
+        if not exp:
+            logger.warning("No 'exp' claim in token, using default TTL")
+            exp = int(time.time() + self.cache_ttl)
+
+        # Cache the result
+        token_hash = hashlib.sha256(token.encode()).hexdigest()
+        userinfo = {
+            "sub": username,
+            "scope": scope_string,
+            **{k: v for k, v in payload.items() if k not in ["sub", "scope"]},
+        }
+        self._token_cache[token_hash] = (userinfo, exp)
+
+        return AccessToken(
+            token=token,
+            client_id=payload.get("client_id", ""),
+            scopes=scopes,
+            expires_at=exp,
+            resource=username,  # Store username in resource field (RFC 8707)
+        )
+
+    def _get_cached_token(self, token: str) -> AccessToken | None:
+        """
+        Retrieve a token from cache if not expired.
+
+        Args:
+            token: The bearer token to look up
+
+        Returns:
+            AccessToken if cached and valid, None otherwise
+        """
+        token_hash = hashlib.sha256(token.encode()).hexdigest()
+        if token_hash not in self._token_cache:
+            return None
+
+        userinfo, expiry = self._token_cache[token_hash]
+
+        # Check if expired
+        if time.time() >= expiry:
+            logger.debug("Cached token expired, removing from cache")
+            del self._token_cache[token_hash]
+            return None
+
+        # Return cached AccessToken
+        username = userinfo.get("sub") or userinfo.get("preferred_username")
+        scope_string = userinfo.get("scope", "")
+        scopes = scope_string.split() if scope_string else []
+
+        return AccessToken(
+            token=token,
+            client_id=userinfo.get("client_id", ""),
+            scopes=scopes,
+            expires_at=int(expiry),
+            resource=username,
+        )
+
+    def clear_cache(self):
+        """Clear the token cache."""
+        self._token_cache.clear()
+        logger.debug("Token cache cleared")
+
+    async def close(self):
+        """Cleanup resources."""
+        await self.http_client.aclose()
+        logger.debug("Unified token verifier closed")
@@ -9,15 +9,217 @@ For OAuth mode: Requires browser-based OAuth login to establish session.

 import logging
 import os
+from pathlib import Path
 from typing import Any

 import httpx
+from jinja2 import Environment, FileSystemLoader
 from starlette.authentication import requires
 from starlette.requests import Request
 from starlette.responses import HTMLResponse, JSONResponse

+from nextcloud_mcp_server.client import NextcloudClient
+
 logger = logging.getLogger(__name__)

+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
+
+
+async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudClient:
+    """Get an authenticated Nextcloud client for user info page operations.
+
+    This is a shared helper for authenticated routes that need to access
+    Nextcloud APIs. It handles both BasicAuth and OAuth authentication modes.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        Authenticated NextcloudClient
+
+    Raises:
+        RuntimeError: If credentials/session not configured
+    """
+    oauth_ctx = getattr(request.app.state, "oauth_context", None)
+
+    # BasicAuth mode - use credentials from environment
+    if not oauth_ctx:
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        username = os.getenv("NEXTCLOUD_USERNAME")
+        password = os.getenv("NEXTCLOUD_PASSWORD")
+
+        if not all([nextcloud_host, username, password]):
+            raise RuntimeError("BasicAuth credentials not configured")
+
+        from httpx import BasicAuth
+
+        assert nextcloud_host is not None
+        assert username is not None
+        assert password is not None
+        return NextcloudClient(
+            base_url=nextcloud_host,
+            username=username,
+            auth=BasicAuth(username, password),
+        )
+
+    # OAuth mode - get token from session
+    storage = oauth_ctx.get("storage")
+    session_id = request.cookies.get("mcp_session")
+
+    if not storage or not session_id:
+        raise RuntimeError("Session not found")
+
+    token_data = await storage.get_refresh_token(session_id)
+    if not token_data or "access_token" not in token_data:
+        raise RuntimeError("No access token found in session")
+
+    access_token = token_data["access_token"]
+    username = token_data.get("username")
+    nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+    if not nextcloud_host or not username:
+        raise RuntimeError("Nextcloud host or username not configured")
+
+    return NextcloudClient.from_token(
+        base_url=nextcloud_host, token=access_token, username=username
+    )
+
+
+async def _get_processing_status(request: Request) -> dict[str, Any] | None:
+    """Get vector sync processing status.
+
+    Returns processing status information including indexed count, pending count,
+    and sync status. Only available when VECTOR_SYNC_ENABLED=true.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        Dictionary with processing status, or None if vector sync is disabled
+        or components are unavailable:
+        {
+            "indexed_count": int,  # Number of documents in Qdrant
+            "pending_count": int,  # Number of documents in queue
+            "status": str,  # "syncing" or "idle"
+        }
+    """
+    # Check if vector sync is enabled
+    vector_sync_enabled = os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
+    if not vector_sync_enabled:
+        return None
+
+    try:
+        # Get document receive stream from app state
+        document_receive_stream = getattr(
+            request.app.state, "document_receive_stream", None
+        )
+        if document_receive_stream is None:
+            logger.debug("document_receive_stream not available in app state")
+            return None
+
+        # Get pending count from stream statistics
+        stats = document_receive_stream.statistics()
+        pending_count = stats.current_buffer_used
+
+        # Get Qdrant client and query indexed count
+        indexed_count = 0
+        try:
+            from nextcloud_mcp_server.config import get_settings
+            from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+            settings = get_settings()
+            qdrant_client = await get_qdrant_client()
+
+            # Count documents in collection
+            count_result = await qdrant_client.count(
+                collection_name=settings.get_collection_name()
+            )
+            indexed_count = count_result.count
+
+        except Exception as e:
+            logger.warning(f"Failed to query Qdrant for indexed count: {e}")
+            # Continue with indexed_count = 0
+
+        # Determine status
+        status = "syncing" if pending_count > 0 else "idle"
+
+        return {
+            "indexed_count": indexed_count,
+            "pending_count": pending_count,
+            "status": status,
+        }
+
+    except Exception as e:
+        logger.error(f"Error getting processing status: {e}")
+        return None
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_sync_status_fragment(request: Request) -> HTMLResponse:
+    """Vector sync status fragment endpoint - returns HTML fragment with current status.
+
+    This endpoint is polled by htmx to provide real-time updates of vector sync processing
+    status without requiring a full page refresh.
+
+    Requires authentication via session cookie (redirects to oauth_login route if not authenticated).
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML response with vector sync status table fragment
+    """
+    processing_status = await _get_processing_status(request)
+
+    # If vector sync is disabled or unavailable, return empty fragment
+    if not processing_status:
+        return HTMLResponse(
+            """
+            <div id="vector-sync-status" hx-get="/app/vector-sync/status" hx-trigger="every 10s" hx-swap="innerHTML">
+                <p style="color: #999;">Vector sync not available</p>
+            </div>
+            """
+        )
+
+    indexed_count = processing_status["indexed_count"]
+    pending_count = processing_status["pending_count"]
+    status = processing_status["status"]
+
+    # Format numbers with commas for readability
+    indexed_count_str = f"{indexed_count:,}"
+    pending_count_str = f"{pending_count:,}"
+
+    # Status badge color and text
+    if status == "syncing":
+        status_badge = (
+            '<span style="color: #ff9800; font-weight: bold;">⟳ Syncing</span>'
+        )
+    else:
+        status_badge = '<span style="color: #4caf50; font-weight: bold;">✓ Idle</span>'
+
+    # Return inner content only (container div is in initial page render)
+    html = f"""
+    <h2>Vector Sync Status</h2>
+    <table>
+        <tr>
+            <td><strong>Indexed Documents</strong></td>
+            <td>{indexed_count_str}</td>
+        </tr>
+        <tr>
+            <td><strong>Pending Documents</strong></td>
+            <td>{pending_count_str}</td>
+        </tr>
+        <tr>
+            <td><strong>Status</strong></td>
+            <td>{status_badge}</td>
+        </tr>
+    </table>
+    """
+
+    return HTMLResponse(html)
+

 async def _get_userinfo_endpoint(oauth_ctx: dict[str, Any]) -> str | None:
    """Get the correct userinfo endpoint based on OAuth mode.
@@ -141,9 +343,23 @@ async def _get_user_info(request: Request) -> dict[str, Any]:

    try:
        # Check if background access was granted (refresh token exists)
+        # This works for both Flow 2 (elicitation) and browser login
        token_data = await storage.get_refresh_token(session_id)
        background_access_granted = token_data is not None

+        # Build background access details
+        background_access_details = None
+        if token_data:
+            background_access_details = {
+                "flow_type": token_data.get("flow_type", "unknown"),
+                "provisioned_at": token_data.get("provisioned_at", "unknown"),
+                "provisioning_client_id": token_data.get(
+                    "provisioning_client_id", "N/A"
+                ),
+                "scopes": token_data.get("scopes", "N/A"),
+                "token_audience": token_data.get("token_audience", "unknown"),
+            }
+
        # Retrieve cached user profile (no token operations!)
        profile_data = await storage.get_user_profile(session_id)

@@ -153,6 +369,7 @@ async def _get_user_info(request: Request) -> dict[str, Any]:
            "auth_mode": "oauth",
            "session_id": session_id[:16] + "...",  # Truncated for security
            "background_access_granted": background_access_granted,
+            "background_access_details": background_access_details,
        }

        # Include cached profile if available
@@ -209,57 +426,36 @@ async def user_info_html(request: Request) -> HTMLResponse:
    """
    user_context = await _get_user_info(request)

+    # Get vector sync processing status
+    processing_status = await _get_processing_status(request)
+
+    # Check if user is admin (for Webhooks tab)
+    is_admin = False
+    try:
+        from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
+
+        # Get authenticated Nextcloud client
+        nc_client = await _get_authenticated_client_for_userinfo(request)
+        is_admin = await is_nextcloud_admin(request, nc_client._client)
+        await nc_client.close()
+    except Exception as e:
+        logger.warning(f"Failed to check admin status: {e}")
+        # Default to not admin if check fails
+
    # Check for error
    if "error" in user_context and user_context["error"] != "":
        # Get login URL dynamically
        oauth_ctx = getattr(request.app.state, "oauth_context", None)
        login_url = str(request.url_for("oauth_login")) if oauth_ctx else "/oauth/login"

-        error_html = f"""
-        <!DOCTYPE html>
-        <html lang="en">
-        <head>
-            <meta charset="UTF-8">
-            <meta name="viewport" content="width=device-width, initial-scale=1.0">
-            <title>Error - Nextcloud MCP Server</title>
-            <style>
-                body {{
-                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                    max-width: 800px;
-                    margin: 50px auto;
-                    padding: 20px;
-                    background-color: #f5f5f5;
-                }}
-                .container {{
-                    background: white;
-                    border-radius: 8px;
-                    padding: 30px;
-                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-                }}
-                h1 {{
-                    color: #d32f2f;
-                    margin-top: 0;
-                }}
-                .error {{
-                    background-color: #ffebee;
-                    border-left: 4px solid #d32f2f;
-                    padding: 15px;
-                    margin: 20px 0;
-                }}
-            </style>
-        </head>
-        <body>
-            <div class="container">
-                <h1>Error Retrieving User Info</h1>
-                <div class="error">
-                    <strong>Error:</strong> {user_context["error"]}
-                </div>
-                <p><a href="{login_url}">Login again</a></p>
-            </div>
-        </body>
-        </html>
-        """
-        return HTMLResponse(content=error_html)
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error Retrieving User Info",
+                error_message=user_context["error"],
+                login_url=login_url,
+            )
+        )

    # Build HTML response
    auth_mode = user_context.get("auth_mode", "unknown")
@@ -273,6 +469,16 @@ async def user_info_html(request: Request) -> HTMLResponse:
            str(request.url_for("oauth_logout")) if oauth_ctx else "/oauth/logout"
        )

+    # Get Nextcloud host for generating links to apps (used by viz tab)
+    # Use public issuer URL if available (for browser-accessible links),
+    # otherwise fall back to NEXTCLOUD_HOST from settings
+    from nextcloud_mcp_server.config import get_settings
+
+    settings = get_settings()
+    nextcloud_host_for_links = (
+        os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL") or settings.nextcloud_host
+    )
+
    # Build host info HTML (BasicAuth only)
    host_info_html = ""
    if auth_mode == "basic":
@@ -291,6 +497,47 @@ async def user_info_html(request: Request) -> HTMLResponse:
    session_info_html = ""
    if auth_mode == "oauth" and "session_id" in user_context:
        session_id = user_context.get("session_id", "unknown")
+        background_access_granted = user_context.get("background_access_granted", False)
+        background_details = user_context.get("background_access_details")
+
+        # Build background access section
+        background_html = ""
+        if background_access_granted and background_details:
+            flow_type = background_details.get("flow_type", "unknown")
+            provisioned_at = background_details.get("provisioned_at", "unknown")
+            scopes = background_details.get("scopes", "N/A")
+            token_audience = background_details.get("token_audience", "unknown")
+
+            background_html = f"""
+            <tr>
+                <td><strong>Background Access</strong></td>
+                <td><span style="color: #4caf50; font-weight: bold;">✓ Granted</span></td>
+            </tr>
+            <tr>
+                <td><strong>Flow Type</strong></td>
+                <td>{flow_type}</td>
+            </tr>
+            <tr>
+                <td><strong>Provisioned At</strong></td>
+                <td>{provisioned_at}</td>
+            </tr>
+            <tr>
+                <td><strong>Token Audience</strong></td>
+                <td>{token_audience}</td>
+            </tr>
+            <tr>
+                <td><strong>Scopes</strong></td>
+                <td><code style="font-size: 11px;">{scopes}</code></td>
+            </tr>
+            """
+        else:
+            background_html = """
+            <tr>
+                <td><strong>Background Access</strong></td>
+                <td><span style="color: #999;">Not Granted</span></td>
+            </tr>
+            """
+
        session_info_html = f"""
        <h2>Session Information</h2>
        <table>
@@ -298,9 +545,34 @@ async def user_info_html(request: Request) -> HTMLResponse:
                <td><strong>Session ID</strong></td>
                <td><code>{session_id}</code></td>
            </tr>
+            {background_html}
        </table>
        """

+        # Add revoke button if background access is granted
+        if background_access_granted:
+            revoke_url = str(request.url_for("revoke_session_endpoint"))
+            session_info_html += f"""
+            <div style="margin-top: 15px;">
+                <form method="post" action="{revoke_url}" onsubmit="return confirm('Are you sure you want to revoke background access? This will delete the refresh token.');">
+                    <button type="submit" style="padding: 8px 16px; background-color: #ff9800; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 14px;">
+                        Revoke Background Access
+                    </button>
+                </form>
+            </div>
+            """
+
+    # Build vector sync status HTML (with htmx auto-refresh)
+    vector_status_html = ""
+    if processing_status:
+        # Use htmx to load and auto-refresh the status fragment
+        # Container div stays stable, only inner content updates every 10s
+        vector_status_html = """
+            <div id="vector-sync-status" hx-get="/app/vector-sync/status" hx-trigger="load, every 10s" hx-swap="innerHTML">
+                <p style="color: #999;">Loading vector sync status...</p>
+            </div>
+        """
+
    # Build IdP profile HTML
    idp_profile_html = ""
    if "idp_profile" in user_context:
@@ -325,124 +597,133 @@ async def user_info_html(request: Request) -> HTMLResponse:
        <div class="warning">{user_context["idp_profile_error"]}</div>
        """

-    html_content = f"""
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>User Info - Nextcloud MCP Server</title>
-        <style>
-            body {{
-                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                max-width: 800px;
-                margin: 50px auto;
-                padding: 20px;
-                background-color: #f5f5f5;
-            }}
-            .container {{
-                background: white;
-                border-radius: 8px;
-                padding: 30px;
-                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-            }}
-            h1 {{
-                color: #0082c9;
-                margin-top: 0;
-                border-bottom: 2px solid #0082c9;
-                padding-bottom: 10px;
-            }}
-            h2 {{
-                color: #333;
-                margin-top: 30px;
-                border-bottom: 1px solid #e0e0e0;
-                padding-bottom: 5px;
-            }}
-            table {{
-                width: 100%;
-                border-collapse: collapse;
-                margin: 15px 0;
-            }}
-            td {{
-                padding: 10px;
-                border-bottom: 1px solid #e0e0e0;
-            }}
-            td:first-child {{
-                width: 200px;
-                color: #666;
-            }}
-            code {{
-                background-color: #f5f5f5;
-                padding: 2px 6px;
-                border-radius: 3px;
-                font-family: 'Courier New', monospace;
-            }}
-            .badge {{
-                display: inline-block;
-                padding: 3px 8px;
-                border-radius: 12px;
-                font-size: 12px;
-                font-weight: bold;
-                text-transform: uppercase;
-            }}
-            .badge-oauth {{
-                background-color: #4caf50;
-                color: white;
-            }}
-            .badge-basic {{
-                background-color: #2196f3;
-                color: white;
-            }}
-            .warning {{
-                background-color: #fff3cd;
-                border-left: 4px solid #ffc107;
-                padding: 15px;
-                margin: 15px 0;
-                color: #856404;
-            }}
-            .logout {{
-                margin-top: 30px;
-                padding-top: 20px;
-                border-top: 1px solid #e0e0e0;
-            }}
-            .button {{
-                display: inline-block;
-                padding: 10px 20px;
-                background-color: #d32f2f;
-                color: white;
-                text-decoration: none;
-                border-radius: 4px;
-                transition: background-color 0.3s;
-            }}
-            .button:hover {{
-                background-color: #b71c1c;
-            }}
-        </style>
-    </head>
-    <body>
-        <div class="container">
-            <h1>Nextcloud MCP Server - User Info</h1>
+    # Build user info tab content
+    user_info_tab_html = f"""
+        <h2>Authentication</h2>
+        <table>
+            <tr>
+                <td><strong>Username</strong></td>
+                <td>{username}</td>
+            </tr>
+            <tr>
+                <td><strong>Authentication Mode</strong></td>
+                <td><span class="badge badge-{auth_mode}">{auth_mode}</span></td>
+            </tr>
+        </table>

-            <h2>Authentication</h2>
-            <table>
-                <tr>
-                    <td><strong>Username</strong></td>
-                    <td>{username}</td>
-                </tr>
-                <tr>
-                    <td><strong>Authentication Mode</strong></td>
-                    <td><span class="badge badge-{auth_mode}">{auth_mode}</span></td>
-                </tr>
-            </table>
-
-            {host_info_html}
-            {session_info_html}
-            {idp_profile_html}
-
-            {f'<div class="logout"><a href="{logout_url}" class="button">Logout</a></div>' if auth_mode == "oauth" else ""}
-        </div>
-    </body>
-    </html>
+        {host_info_html}
+        {session_info_html}
+        {idp_profile_html}
    """

-    return HTMLResponse(content=html_content)
+    # Determine which tabs to show
+    show_vector_sync_tab = processing_status is not None
+    show_webhooks_tab = is_admin
+
+    # Build vector sync tab content (only if enabled)
+    vector_sync_tab_html = ""
+    if show_vector_sync_tab:
+        vector_sync_tab_html = vector_status_html
+
+    # Build webhooks tab content (only if admin)
+    webhooks_tab_html = ""
+    if show_webhooks_tab:
+        webhooks_tab_html = """
+            <div hx-get="/app/webhooks" hx-trigger="load" hx-swap="outerHTML">
+                <p style="color: #999;">Loading webhook management...</p>
+            </div>
+        """
+
+    # Check if vector sync is enabled (needed for Welcome tab)
+    vector_sync_enabled = os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
+
+    # Render template
+    template = _jinja_env.get_template("user_info.html")
+    return HTMLResponse(
+        content=template.render(
+            user_info_tab_html=user_info_tab_html,
+            vector_sync_tab_html=vector_sync_tab_html,
+            webhooks_tab_html=webhooks_tab_html,
+            show_vector_sync_tab=show_vector_sync_tab,
+            show_webhooks_tab=show_webhooks_tab,
+            logout_url=logout_url if auth_mode == "oauth" else None,
+            nextcloud_host_for_links=nextcloud_host_for_links,
+            # Additional context for Welcome tab
+            vector_sync_enabled=vector_sync_enabled,
+            username=username,
+            auth_mode=auth_mode,
+        )
+    )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def revoke_session(request: Request) -> HTMLResponse:
+    """Revoke background access (delete refresh token).
+
+    This endpoint allows users to revoke the refresh token that grants
+    background access to Nextcloud resources. The session cookie remains
+    valid for browser UI access, but background jobs will no longer work.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML response confirming revocation or showing error
+    """
+    oauth_ctx = getattr(request.app.state, "oauth_context", None)
+
+    if not oauth_ctx:
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error",
+                error_message="OAuth mode not enabled",
+            ),
+            status_code=400,
+        )
+
+    storage = oauth_ctx.get("storage")
+    session_id = request.cookies.get("mcp_session")
+
+    if not storage or not session_id:
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error",
+                error_message="Session not found",
+            ),
+            status_code=400,
+        )
+
+    try:
+        # Delete the refresh token
+        logger.info(f"Revoking background access for session {session_id[:16]}...")
+        await storage.delete_refresh_token(session_id)
+        logger.info(f"✓ Background access revoked for session {session_id[:16]}...")
+
+        # Redirect back to user page
+        user_page_url = str(request.url_for("user_info_html"))
+
+        template = _jinja_env.get_template("success.html")
+        return HTMLResponse(
+            content=template.render(
+                success_title="✓ Background Access Revoked",
+                success_messages=[
+                    "Your refresh token has been deleted successfully.",
+                    "Browser session remains active.",
+                ],
+                redirect_url=user_page_url,
+                redirect_delay=2,
+            )
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to revoke background access: {e}")
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error",
+                error_message=f"Failed to revoke background access: {e}",
+            ),
+            status_code=500,
+        )
@@ -0,0 +1,632 @@
+"""Vector visualization routes for testing search algorithms.
+
+Provides a web UI for users to test different search algorithms on their own
+indexed documents and visualize results in 3D space using PCA.
+
+All processing happens server-side following ADR-012:
+- Search execution via shared search/algorithms.py
+- Query embedding generation
+- PCA dimensionality reduction (768-dim → 3D)
+- Only 3D coordinates + metadata sent to client
+- Bandwidth-efficient (3 floats per doc vs 768)
+"""
+
+import logging
+import time
+from pathlib import Path
+
+import numpy as np
+from jinja2 import Environment, FileSystemLoader
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse, JSONResponse
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.search import (
+    BM25HybridSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)
+from nextcloud_mcp_server.vector.pca import PCA
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_html(request: Request) -> HTMLResponse:
+    """Vector visualization page with search controls and interactive plot.
+
+    Provides UI for testing search algorithms with real-time visualization.
+    Requires vector sync to be enabled.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML page with search interface
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return HTMLResponse(
+            """
+            <div>
+                <h2>Vector Visualization</h2>
+                <div style="padding: 20px; background: #fff3cd; border: 1px solid #ffc107; border-radius: 4px;">
+                    Vector sync is not enabled. Set VECTOR_SYNC_ENABLED=true to use this feature.
+                </div>
+            </div>
+            """
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name
+        if hasattr(request.user, "display_name")
+        else "unknown"
+    )
+
+    # Load and render template
+    template = _jinja_env.get_template("vector_viz.html")
+    html_content = template.render(username=username)
+    return HTMLResponse(content=html_content)
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_search(request: Request) -> JSONResponse:
+    """Execute server-side search and return 3D coordinates + results.
+
+    All processing happens server-side:
+    1. Execute search via shared algorithm module
+    2. Generate query embedding
+    3. Fetch matching vectors from Qdrant
+    4. Apply PCA reduction (768-dim → 3D) to query + documents
+    5. Return coordinates + metadata only
+
+    Args:
+        request: Starlette request with query parameters
+
+    Returns:
+        JSON response with coordinates_3d and results (including query point)
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"success": False, "error": "Vector sync not enabled"},
+            status_code=400,
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name if hasattr(request.user, "display_name") else None
+    )
+
+    if not username:
+        return JSONResponse(
+            {"success": False, "error": "User not authenticated"},
+            status_code=401,
+        )
+
+    # Parse query parameters
+    query = request.query_params.get("query", "")
+    algorithm = request.query_params.get("algorithm", "bm25_hybrid")
+    limit = int(request.query_params.get("limit", "50"))
+    score_threshold = float(request.query_params.get("score_threshold", "0.0"))
+    fusion = request.query_params.get("fusion", "rrf")  # Default to RRF
+
+    # Parse doc_types (comma-separated list, None = all types)
+    doc_types_param = request.query_params.get("doc_types", "")
+    doc_types = doc_types_param.split(",") if doc_types_param else None
+
+    logger.info(
+        f"Viz search: user={username}, query='{query}', "
+        f"algorithm={algorithm}, fusion={fusion}, limit={limit}, doc_types={doc_types}"
+    )
+
+    try:
+        # Start total request timer
+        request_start = time.perf_counter()
+        # Get authenticated HTTP client from session
+        # In BasicAuth mode: uses username/password from session
+        # In OAuth mode: uses access token from session
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+
+        async with await _get_authenticated_client_for_userinfo(request) as nc_client:  # noqa: F841
+            # Create search algorithm (no client needed - verification removed)
+            if algorithm == "semantic":
+                search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+            elif algorithm == "bm25_hybrid":
+                search_algo = BM25HybridSearchAlgorithm(
+                    score_threshold=score_threshold, fusion=fusion
+                )
+            else:
+                return JSONResponse(
+                    {"success": False, "error": f"Unknown algorithm: {algorithm}"},
+                    status_code=400,
+                )
+
+            # Execute search (supports cross-app when doc_types=None)
+            # Get unverified results with buffer for filtering
+            search_start = time.perf_counter()
+            all_results = []
+            if doc_types is None or len(doc_types) == 0:
+                # Cross-app search - search all indexed types
+                unverified_results = await search_algo.search(
+                    query=query,
+                    user_id=username,
+                    limit=limit * 2,  # Buffer for verification filtering
+                    doc_type=None,  # Search all types
+                    score_threshold=score_threshold,
+                )
+                all_results.extend(unverified_results)
+            else:
+                # Search each document type and combine
+                for doc_type in doc_types:
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Buffer for verification filtering
+                        doc_type=doc_type,
+                        score_threshold=score_threshold,
+                    )
+                    all_results.extend(unverified_results)
+                # Sort by score before verification
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # No verification needed for visualization - we only need Qdrant metadata
+            # (title, excerpt, doc_type) which is already in search results.
+            # Verification is only needed for sampling (LLM needs full content).
+            search_results = all_results[:limit]
+            search_duration = time.perf_counter() - search_start
+
+        # Store original scores and normalize for visualization
+        # (best result = 1.0, worst result = 0.0 within THIS result set)
+        # This makes visual encoding meaningful regardless of RRF normalization
+        if search_results:
+            scores = [r.score for r in search_results]
+            min_score, max_score = min(scores), max(scores)
+            score_range = max_score - min_score if max_score > min_score else 1.0
+
+            logger.info(
+                f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
+                f"→ [0.0, 1.0]"
+            )
+
+            # Store original score and rescale to 0-1 for visualization
+            for r in search_results:
+                # Store original score before normalization
+                r.original_score = r.score
+                # Rescale for visual encoding
+                r.score = (r.score - min_score) / score_range
+
+        if not search_results:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [],
+                    "coordinates_3d": [],
+                    "query_coords": [],
+                    "message": "No results found",
+                }
+            )
+
+        # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
+        vector_fetch_start = time.perf_counter()
+        qdrant_client = await get_qdrant_client()
+
+        chunk_vectors_map = {}  # Map (doc_id, chunk_start, chunk_end) -> vector
+
+        # Collect point IDs from search results for batch retrieval
+        # point_id is the Qdrant internal ID returned by search algorithms
+        point_ids = [r.point_id for r in search_results if r.point_id]
+
+        if point_ids:
+            # Single batch retrieve call instead of N sequential scroll calls
+            # This is ~50x faster for 50 results (1 HTTP request vs 50)
+            points_response = await qdrant_client.retrieve(
+                collection_name=settings.get_collection_name(),
+                ids=point_ids,
+                with_vectors=["dense"],
+                with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
+            )
+
+            # Build chunk_vectors_map from batch response
+            for point in points_response:
+                if point.vector is not None:
+                    # Extract dense vector (handle both named and unnamed vectors)
+                    if isinstance(point.vector, dict):
+                        vector = point.vector.get("dense")
+                    else:
+                        vector = point.vector
+
+                    if vector is not None and point.payload:
+                        doc_id = point.payload.get("doc_id")
+                        chunk_start = point.payload.get("chunk_start_offset")
+                        chunk_end = point.payload.get("chunk_end_offset")
+                        chunk_key = (doc_id, chunk_start, chunk_end)
+                        chunk_vectors_map[chunk_key] = vector
+
+        vector_fetch_duration = time.perf_counter() - vector_fetch_start
+
+        if len(chunk_vectors_map) < 2:
+            # Not enough chunks for PCA
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [
+                        {
+                            "id": r.id,
+                            "doc_type": r.doc_type,
+                            "title": r.title,
+                            "excerpt": r.excerpt,
+                            "score": r.score,
+                        }
+                        for r in search_results
+                    ],
+                    "coordinates_3d": [[0, 0, 0]] * len(search_results),
+                    "query_coords": [0, 0, 0],
+                    "message": "Not enough chunks for PCA",
+                }
+            )
+
+        # Detect embedding dimension from first available vector
+        embedding_dim = None
+        for vector in chunk_vectors_map.values():
+            if vector is not None:
+                embedding_dim = len(vector)
+                break
+
+        if embedding_dim is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Could not determine embedding dimension",
+                },
+                status_code=500,
+            )
+
+        logger.info(f"Detected embedding dimension: {embedding_dim}")
+
+        # Build chunk vectors array in search_results order (1:1 mapping)
+        chunk_vectors = []
+        for result in search_results:
+            chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
+            if chunk_key in chunk_vectors_map:
+                chunk_vectors.append(chunk_vectors_map[chunk_key])
+            else:
+                # Chunk not found in vectors (shouldn't happen)
+                logger.warning(
+                    f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
+                )
+                # Use zero vector as fallback
+                chunk_vectors.append(np.zeros(embedding_dim))
+
+        chunk_vectors = np.array(chunk_vectors)
+
+        # Reuse query embedding from search algorithm (avoids redundant embedding call)
+        query_embed_start = time.perf_counter()
+        if search_algo.query_embedding is not None:
+            query_embedding = search_algo.query_embedding
+            logger.info(
+                f"Reusing query embedding from search algorithm "
+                f"(dimension={len(query_embedding)})"
+            )
+        else:
+            # Fallback: generate embedding if not available from search
+            from nextcloud_mcp_server.embedding.service import get_embedding_service
+
+            embedding_service = get_embedding_service()
+            query_embedding = await embedding_service.embed(query)
+            logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
+        query_embed_duration = time.perf_counter() - query_embed_start
+
+        # Combine query vector with chunk vectors for PCA
+        # Query will be the last point in the array
+        all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
+
+        # Normalize vectors to unit length (L2 normalization)
+        # This is critical because Qdrant uses COSINE distance, which only measures
+        # vector direction (angle), not magnitude. PCA uses Euclidean distance which
+        # considers both direction and magnitude. By normalizing to unit length,
+        # Euclidean distances in PCA space will match cosine distances.
+        norms = np.linalg.norm(all_vectors, axis=1, keepdims=True)
+
+        # Check for zero-norm vectors (can happen with empty/corrupted embeddings)
+        zero_norm_mask = norms[:, 0] < 1e-10
+        if zero_norm_mask.any():
+            zero_indices = np.where(zero_norm_mask)[0]
+            logger.warning(
+                f"Found {zero_norm_mask.sum()} zero-norm vectors at indices {zero_indices.tolist()}. "
+                "Replacing with small epsilon to avoid division by zero."
+            )
+            # Replace zero norms with small epsilon to avoid NaN
+            norms[zero_norm_mask] = 1e-10
+
+        all_vectors_normalized = all_vectors / norms
+        logger.info(
+            f"Normalized vectors: query_norm={norms[-1][0]:.3f}, "
+            f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]"
+        )
+
+        # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
+        # Run in thread pool to avoid blocking the event loop (CPU-bound)
+        pca_start = time.perf_counter()
+
+        def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
+            pca = PCA(n_components=3)
+            coords = pca.fit_transform(vectors)
+            return coords, pca
+
+        import anyio
+
+        coords_3d, pca = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+            lambda: _compute_pca(all_vectors_normalized)
+        )
+        pca_duration = time.perf_counter() - pca_start
+
+        # After fit, these attributes are guaranteed to be set
+        assert pca.explained_variance_ratio_ is not None
+
+        # Check for NaN values in PCA output (numerical instability)
+        nan_mask = np.isnan(coords_3d)
+        if nan_mask.any():
+            nan_rows = np.where(nan_mask.any(axis=1))[0]
+            logger.error(
+                f"Found NaN values in PCA output at {len(nan_rows)} points: {nan_rows.tolist()[:10]}. "
+                "Replacing NaN with 0.0 to prevent JSON serialization error."
+            )
+            # Replace NaN with 0 to allow JSON serialization
+            coords_3d = np.nan_to_num(coords_3d, nan=0.0)
+
+        # Split query coords from chunk coords
+        # Round to 2 decimal places for cleaner display
+        query_coords_3d = [
+            round(float(x), 2) for x in coords_3d[-1]
+        ]  # Last point is query
+        chunk_coords_3d = coords_3d[:-1]  # All but last are chunks
+
+        logger.info(
+            f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
+            f"PC2={pca.explained_variance_ratio_[1]:.3f}, "
+            f"PC3={pca.explained_variance_ratio_[2]:.3f}"
+        )
+        logger.info(
+            f"Embedding stats: chunks={len(chunk_vectors)}, "
+            f"query_dim={len(query_embedding)}, chunk_vector_dim={chunk_vectors.shape[1] if chunk_vectors.size > 0 else 0}"
+        )
+
+        # Coordinates already match search_results order (1:1 mapping)
+        result_coords = [
+            [round(float(x), 2) for x in coord] for coord in chunk_coords_3d
+        ]
+
+        # Build response
+        response_results = [
+            {
+                "id": r.id,
+                "doc_type": r.doc_type,
+                "title": r.title,
+                "excerpt": r.excerpt,
+                "score": r.score,  # Normalized score for visual encoding (0-1)
+                "original_score": getattr(
+                    r, "original_score", r.score
+                ),  # Raw score from algorithm
+                "chunk_start_offset": r.chunk_start_offset,
+                "chunk_end_offset": r.chunk_end_offset,
+            }
+            for r in search_results
+        ]
+
+        # Calculate total request duration
+        total_duration = time.perf_counter() - request_start
+
+        # Log comprehensive timing metrics
+        logger.info(
+            f"Viz search timing: total={total_duration * 1000:.1f}ms, "
+            f"search={search_duration * 1000:.1f}ms ({search_duration / total_duration * 100:.1f}%), "
+            f"vector_fetch={vector_fetch_duration * 1000:.1f}ms ({vector_fetch_duration / total_duration * 100:.1f}%), "
+            f"query_embed={query_embed_duration * 1000:.1f}ms ({query_embed_duration / total_duration * 100:.1f}%), "
+            f"pca={pca_duration * 1000:.1f}ms ({pca_duration / total_duration * 100:.1f}%), "
+            f"results={len(search_results)}, chunk_vectors={len(chunk_vectors)}"
+        )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "results": response_results,
+                "coordinates_3d": result_coords[: len(search_results)],
+                "query_coords": query_coords_3d,
+                "pca_variance": {
+                    "pc1": float(pca.explained_variance_ratio_[0]),
+                    "pc2": float(pca.explained_variance_ratio_[1]),
+                    "pc3": float(pca.explained_variance_ratio_[2]),
+                },
+                "timing": {
+                    "total_ms": round(total_duration * 1000, 2),
+                    "search_ms": round(search_duration * 1000, 2),
+                    "vector_fetch_ms": round(vector_fetch_duration * 1000, 2),
+                    "query_embed_ms": round(query_embed_duration * 1000, 2),
+                    "pca_ms": round(pca_duration * 1000, 2),
+                    "num_results": len(search_results),
+                    "num_chunk_vectors": len(chunk_vectors),
+                },
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Viz search error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def chunk_context_endpoint(request: Request) -> JSONResponse:
+    """Fetch chunk text with surrounding context for visualization.
+
+    This endpoint retrieves the matched chunk along with surrounding text
+    to provide context for the search result. Used by the viz pane to
+    display chunks inline.
+
+    Query parameters:
+        doc_type: Document type (e.g., "note")
+        doc_id: Document ID
+        start: Chunk start offset (character position)
+        end: Chunk end offset (character position)
+        context: Characters of context before/after (default: 500)
+
+    Returns:
+        JSON with chunk_text, before_context, after_context, and flags
+    """
+    try:
+        # Get query parameters
+        doc_type = request.query_params.get("doc_type")
+        doc_id = request.query_params.get("doc_id")
+        start_str = request.query_params.get("start")
+        end_str = request.query_params.get("end")
+        context_chars = int(request.query_params.get("context", "500"))
+
+        # Validate required parameters
+        if not all([doc_type, doc_id, start_str, end_str]):
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Missing required parameters: doc_type, doc_id, start, end",
+                },
+                status_code=400,
+            )
+
+        # Type assertions - we validated these above
+        assert doc_type is not None
+        assert doc_id is not None
+        assert start_str is not None
+        assert end_str is not None
+
+        start = int(start_str)
+        end = int(end_str)
+        # Convert doc_id to int (all document types use int IDs)
+        doc_id_int = int(doc_id)
+
+        # Get authenticated Nextcloud client
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+        from nextcloud_mcp_server.search.context import get_chunk_with_context
+
+        # Use context expansion module to fetch chunk with surrounding context
+        async with await _get_authenticated_client_for_userinfo(request) as nc_client:
+            chunk_context = await get_chunk_with_context(
+                nc_client=nc_client,
+                user_id=request.user.display_name,  # User ID from auth
+                doc_id=doc_id_int,
+                doc_type=doc_type,
+                chunk_start=start,
+                chunk_end=end,
+                context_chars=context_chars,
+            )
+
+        # Check if context expansion succeeded
+        if chunk_context is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
+                },
+                status_code=404,
+            )
+
+        logger.info(
+            f"Fetched chunk context for {doc_type}_{doc_id}: "
+            f"chunk_len={len(chunk_context.chunk_text)}, "
+            f"before_len={len(chunk_context.before_context)}, "
+            f"after_len={len(chunk_context.after_context)}"
+        )
+
+        # For PDF files, also fetch the highlighted page image from Qdrant
+        highlighted_page_image = None
+        page_number = None
+        if doc_type == "file":
+            try:
+                from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+                settings = get_settings()
+                qdrant_client = await get_qdrant_client()
+                username = request.user.display_name
+
+                # Query for this specific chunk's highlighted image
+                points_response = await qdrant_client.scroll(
+                    collection_name=settings.get_collection_name(),
+                    scroll_filter=Filter(
+                        must=[
+                            get_placeholder_filter(),
+                            FieldCondition(
+                                key="doc_id", match=MatchValue(value=doc_id_int)
+                            ),
+                            FieldCondition(
+                                key="user_id", match=MatchValue(value=username)
+                            ),
+                            FieldCondition(
+                                key="chunk_start_offset", match=MatchValue(value=start)
+                            ),
+                            FieldCondition(
+                                key="chunk_end_offset", match=MatchValue(value=end)
+                            ),
+                        ]
+                    ),
+                    limit=1,
+                    with_vectors=False,
+                    with_payload=["highlighted_page_image", "page_number"],
+                )
+
+                points = points_response[0]
+                if points and points[0].payload:
+                    highlighted_page_image = points[0].payload.get(
+                        "highlighted_page_image"
+                    )
+                    page_number = points[0].payload.get("page_number")
+                    if highlighted_page_image:
+                        logger.info(
+                            f"Found highlighted image for chunk: "
+                            f"page={page_number}, image_size={len(highlighted_page_image)}"
+                        )
+            except Exception as e:
+                logger.warning(f"Failed to fetch highlighted image: {e}")
+
+        # Return response compatible with frontend expectations
+        response_data: dict = {
+            "success": True,
+            "chunk_text": chunk_context.chunk_text,
+            "before_context": chunk_context.before_context,
+            "after_context": chunk_context.after_context,
+            "has_more_before": chunk_context.has_before_truncation,
+            "has_more_after": chunk_context.has_after_truncation,
+        }
+
+        # Add image data if available
+        if highlighted_page_image:
+            response_data["highlighted_page_image"] = highlighted_page_image
+            response_data["page_number"] = page_number
+
+        return JSONResponse(response_data)
+
+    except ValueError as e:
+        logger.error(f"Invalid parameter format: {e}")
+        return JSONResponse(
+            {"success": False, "error": f"Invalid parameter format: {e}"},
+            status_code=400,
+        )
+    except Exception as e:
+        logger.error(f"Chunk context error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
@@ -0,0 +1,540 @@
+"""Webhook management routes for admin UI.
+
+Provides browser-based endpoints for admin users to manage webhook configurations
+using preset templates. Only accessible to Nextcloud administrators.
+"""
+
+import logging
+import os
+
+import httpx
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse
+
+from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
+from nextcloud_mcp_server.client.webhooks import WebhooksClient
+from nextcloud_mcp_server.server.webhook_presets import (
+    WEBHOOK_PRESETS,
+    filter_presets_by_installed_apps,
+    get_preset,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _get_storage(request: Request):
+    """Get storage instance from app state.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        RefreshTokenStorage instance or None
+    """
+    # Try browser_app state first (for /app routes)
+    storage = getattr(request.app.state, "storage", None)
+
+    # Try oauth_context if in OAuth mode
+    if not storage:
+        oauth_ctx = getattr(request.app.state, "oauth_context", None)
+        if oauth_ctx:
+            storage = oauth_ctx.get("storage")
+
+    return storage
+
+
+async def _get_installed_apps(http_client: httpx.AsyncClient) -> list[str]:
+    """Get list of installed and enabled apps from Nextcloud capabilities.
+
+    Args:
+        http_client: Authenticated HTTP client
+
+    Returns:
+        List of installed app names (e.g., ["notes", "calendar", "forms"])
+    """
+    try:
+        response = await http_client.get(
+            "/ocs/v2.php/cloud/capabilities",
+            headers={"OCS-APIRequest": "true", "Accept": "application/json"},
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        # Extract app names from capabilities
+        capabilities = data.get("ocs", {}).get("data", {}).get("capabilities", {})
+        # Filter out core NC capabilities (not apps)
+        core_keys = {"version", "core"}
+        app_keys = set(capabilities.keys()) - core_keys
+        return sorted(app_keys)
+    except Exception as e:
+        logger.warning(f"Failed to get installed apps from capabilities: {e}")
+        return []
+
+
+def _get_webhook_uri() -> str:
+    """Get the webhook endpoint URI for this MCP server.
+
+    This function determines the correct webhook URL based on the environment:
+    1. Uses WEBHOOK_INTERNAL_URL if explicitly set (highest priority)
+    2. Detects Docker environment and uses internal service name
+    3. Falls back to NEXTCLOUD_MCP_SERVER_URL
+
+    In Docker environments, Nextcloud needs to reach the MCP service using
+    the internal Docker network hostname (e.g., http://mcp:8000), not localhost.
+
+    Returns:
+        Full webhook endpoint URL accessible from Nextcloud
+    """
+    # Explicit override (highest priority)
+    webhook_url = os.getenv("WEBHOOK_INTERNAL_URL")
+    if webhook_url:
+        return f"{webhook_url}/webhooks/nextcloud"
+
+    # Detect Docker environment
+    # Check for common Docker indicators
+    is_docker = (
+        os.path.exists("/.dockerenv")  # Docker container marker file
+        or os.path.exists("/run/.containerenv")  # Podman marker
+        or os.getenv("DOCKER_CONTAINER") == "true"  # Explicit flag
+    )
+
+    if is_docker:
+        # In Docker, use internal service name from NEXTCLOUD_MCP_SERVICE_NAME
+        # or default to 'mcp' (docker-compose service name)
+        service_name = os.getenv("NEXTCLOUD_MCP_SERVICE_NAME", "mcp")
+        port = os.getenv("NEXTCLOUD_MCP_PORT", "8000")
+        logger.debug(
+            f"Docker environment detected, using internal URL: http://{service_name}:{port}"
+        )
+        return f"http://{service_name}:{port}/webhooks/nextcloud"
+
+    # Fallback to configured server URL (for non-Docker deployments)
+    server_url = os.getenv("NEXTCLOUD_MCP_SERVER_URL", "http://localhost:8000")
+    return f"{server_url}/webhooks/nextcloud"
+
+
+async def _get_authenticated_client(request: Request) -> httpx.AsyncClient:
+    """Get an authenticated HTTP client for Nextcloud API calls.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        Authenticated httpx.AsyncClient
+
+    Raises:
+        RuntimeError: If unable to create authenticated client
+    """
+    # Get OAuth context from app state
+    oauth_ctx = getattr(request.app.state, "oauth_context", None)
+
+    # BasicAuth mode - use credentials from environment
+    if not oauth_ctx:
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        username = os.getenv("NEXTCLOUD_USERNAME")
+        password = os.getenv("NEXTCLOUD_PASSWORD")
+
+        if not all([nextcloud_host, username, password]):
+            raise RuntimeError("BasicAuth credentials not configured")
+
+        assert nextcloud_host is not None  # Type narrowing for type checker
+        return httpx.AsyncClient(
+            base_url=nextcloud_host,
+            auth=(username, password),
+            timeout=30.0,
+        )
+
+    # OAuth mode - get token from session
+    storage = oauth_ctx.get("storage")
+    session_id = request.cookies.get("mcp_session")
+
+    if not storage or not session_id:
+        raise RuntimeError("Session not found")
+
+    token_data = await storage.get_refresh_token(session_id)
+    if not token_data or "access_token" not in token_data:
+        raise RuntimeError("No access token found in session")
+
+    access_token = token_data["access_token"]
+    nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+    if not nextcloud_host:
+        raise RuntimeError("Nextcloud host not configured")
+
+    return httpx.AsyncClient(
+        base_url=nextcloud_host,
+        headers={"Authorization": f"Bearer {access_token}"},
+        timeout=30.0,
+    )
+
+
+async def _get_enabled_presets(
+    webhooks_client: WebhooksClient,
+    storage=None,
+) -> dict[str, list[int]]:
+    """Get currently enabled webhook presets.
+
+    Reads from database first for better performance. Falls back to API if needed.
+
+    Args:
+        webhooks_client: Webhooks API client
+        storage: Optional RefreshTokenStorage instance
+
+    Returns:
+        Dictionary mapping preset_id to list of webhook IDs
+    """
+    try:
+        # Try database first (faster, works offline)
+        if storage:
+            all_webhooks = await storage.list_all_webhooks()
+            enabled_presets: dict[str, list[int]] = {}
+
+            for webhook in all_webhooks:
+                preset_id = webhook["preset_id"]
+                webhook_id = webhook["webhook_id"]
+
+                if preset_id not in enabled_presets:
+                    enabled_presets[preset_id] = []
+                enabled_presets[preset_id].append(webhook_id)
+
+            return enabled_presets
+
+        # Fallback to API query
+        registered_webhooks = await webhooks_client.list_webhooks()
+        webhook_uri = _get_webhook_uri()
+
+        # Group webhooks by preset based on matching events
+        enabled_presets: dict[str, list[int]] = {}
+
+        for preset_id, preset in WEBHOOK_PRESETS.items():
+            preset_event_classes = {event["event"] for event in preset["events"]}
+            matching_webhooks = []
+
+            for webhook in registered_webhooks:
+                # Check if webhook matches this preset
+                if (
+                    webhook.get("uri") == webhook_uri
+                    and webhook.get("event") in preset_event_classes
+                ):
+                    matching_webhooks.append(webhook["id"])
+
+            if matching_webhooks:
+                enabled_presets[preset_id] = matching_webhooks
+
+        return enabled_presets
+
+    except Exception as e:
+        logger.error(f"Failed to list webhooks: {e}")
+        return {}
+
+
+@requires("authenticated", redirect="oauth_login")
+async def webhook_management_pane(request: Request) -> HTMLResponse:
+    """Webhook management pane - returns HTML for webhook configuration.
+
+    This endpoint checks if the user is an admin and returns either:
+    - Admin view: Webhook management interface with preset controls
+    - Non-admin view: Message indicating admin-only access
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML response with webhook management interface or access denied message
+    """
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+
+        if not is_admin:
+            return HTMLResponse(
+                content="""
+                <div class="info-message">
+                    <p><strong>Admin Access Required</strong></p>
+                    <p>Webhook management is only available to Nextcloud administrators.</p>
+                    <p>Your account does not have admin privileges.</p>
+                </div>
+                """
+            )
+
+        # Get webhooks client
+        webhooks_client = WebhooksClient(http_client, username)
+
+        # Get storage for database-backed webhook tracking
+        storage = _get_storage(request)
+
+        # Get installed apps to filter presets
+        installed_apps = await _get_installed_apps(http_client)
+        logger.debug(f"Installed apps: {installed_apps}")
+
+        # Get currently enabled presets (from database or API)
+        enabled_presets = await _get_enabled_presets(webhooks_client, storage)
+
+        # Filter presets based on installed apps
+        available_presets = filter_presets_by_installed_apps(installed_apps)
+
+        # Build preset cards HTML
+        preset_cards_html = ""
+        for preset_id, preset in available_presets:
+            is_enabled = preset_id in enabled_presets
+            num_webhooks = len(enabled_presets.get(preset_id, []))
+
+            # Status badge
+            if is_enabled:
+                status_badge = f'<span style="color: #4caf50; font-weight: bold;">✓ Enabled ({num_webhooks} webhooks)</span>'
+                action_button = f"""
+                <button
+                    hx-delete="/app/webhooks/disable/{preset_id}"
+                    hx-target="#preset-{preset_id}"
+                    hx-swap="outerHTML"
+                    class="button"
+                    style="background-color: #ff9800;">
+                    Disable
+                </button>
+                """
+            else:
+                status_badge = '<span style="color: #999;">Not Enabled</span>'
+                action_button = f"""
+                <button
+                    hx-post="/app/webhooks/enable/{preset_id}"
+                    hx-target="#preset-{preset_id}"
+                    hx-swap="outerHTML"
+                    class="button button-primary">
+                    Enable
+                </button>
+                """
+
+            preset_cards_html += f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div>{status_badge}</div>
+                    <div>{action_button}</div>
+                </div>
+            </div>
+            """
+
+        # Get webhook endpoint URL for display
+        webhook_uri = _get_webhook_uri()
+
+        html_content = f"""
+        <h2>Webhook Management</h2>
+        <div class="info-message">
+            <p><strong>About Webhooks</strong></p>
+            <p>Webhooks enable real-time synchronization by notifying this server when content changes in Nextcloud.</p>
+            <p><strong>Endpoint:</strong> <code>{webhook_uri}</code></p>
+        </div>
+
+        <h3 style="margin-top: 30px;">Available Presets</h3>
+        <p style="color: #666;">Enable webhook presets with one click for common synchronization scenarios.</p>
+        <p style="color: #999; font-size: 13px; margin-top: 5px;">Showing {len(available_presets)} preset(s) for your installed apps ({len(installed_apps)} detected)</p>
+
+        {preset_cards_html}
+        """
+
+        return HTMLResponse(content=html_content)
+
+    except Exception as e:
+        logger.error(f"Error loading webhook management pane: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f"""
+            <div class="warning">
+                <p><strong>Error Loading Webhooks</strong></p>
+                <p>{str(e)}</p>
+            </div>
+            """,
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def enable_webhook_preset(request: Request) -> HTMLResponse:
+    """Enable a webhook preset by registering all webhooks.
+
+    Args:
+        request: Starlette request object (preset_id in path)
+
+    Returns:
+        HTML response with updated preset card
+    """
+    preset_id = request.path_params["preset_id"]
+
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+        if not is_admin:
+            return HTMLResponse(
+                content='<div class="warning">Admin access required</div>',
+                status_code=403,
+            )
+
+        # Get preset configuration
+        preset = get_preset(preset_id)
+        if not preset:
+            return HTMLResponse(
+                content=f'<div class="warning">Unknown preset: {preset_id}</div>',
+                status_code=404,
+            )
+
+        # Register webhooks
+        webhooks_client = WebhooksClient(http_client, username)
+        webhook_uri = _get_webhook_uri()
+        registered_ids = []
+
+        for event_config in preset["events"]:
+            webhook_data = await webhooks_client.create_webhook(
+                event=event_config["event"],
+                uri=webhook_uri,
+                event_filter=event_config["filter"] if event_config["filter"] else None,
+            )
+            webhook_id = webhook_data["id"]
+            registered_ids.append(webhook_id)
+            logger.info(f"Registered webhook {webhook_id} for {event_config['event']}")
+
+        # Persist webhook IDs to database
+        storage = _get_storage(request)
+        if storage:
+            for webhook_id in registered_ids:
+                await storage.store_webhook(webhook_id, preset_id)
+            logger.info(
+                f"Persisted {len(registered_ids)} webhook(s) for preset '{preset_id}' to database"
+            )
+
+        # Return updated card
+        num_webhooks = len(registered_ids)
+        return HTMLResponse(
+            content=f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div><span style="color: #4caf50; font-weight: bold;">✓ Enabled ({num_webhooks} webhooks)</span></div>
+                    <div>
+                        <button
+                            hx-delete="/app/webhooks/disable/{preset_id}"
+                            hx-target="#preset-{preset_id}"
+                            hx-swap="outerHTML"
+                            class="button"
+                            style="background-color: #ff9800;">
+                            Disable
+                        </button>
+                    </div>
+                </div>
+            </div>
+            """
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to enable preset {preset_id}: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f'<div class="warning">Failed to enable preset: {str(e)}</div>',
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def disable_webhook_preset(request: Request) -> HTMLResponse:
+    """Disable a webhook preset by deleting all registered webhooks.
+
+    Args:
+        request: Starlette request object (preset_id in path)
+
+    Returns:
+        HTML response with updated preset card
+    """
+    preset_id = request.path_params["preset_id"]
+
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+        if not is_admin:
+            return HTMLResponse(
+                content='<div class="warning">Admin access required</div>',
+                status_code=403,
+            )
+
+        # Get preset configuration
+        preset = get_preset(preset_id)
+        if not preset:
+            return HTMLResponse(
+                content=f'<div class="warning">Unknown preset: {preset_id}</div>',
+                status_code=404,
+            )
+
+        # Find and delete matching webhooks
+        webhooks_client = WebhooksClient(http_client, username)
+
+        # Get webhook IDs from database first (more reliable)
+        storage = _get_storage(request)
+        if storage:
+            webhook_ids = await storage.get_webhooks_by_preset(preset_id)
+        else:
+            # Fallback to API query if storage not available
+            enabled_presets = await _get_enabled_presets(webhooks_client)
+            webhook_ids = enabled_presets.get(preset_id, [])
+
+        for webhook_id in webhook_ids:
+            await webhooks_client.delete_webhook(webhook_id)
+            logger.info(f"Deleted webhook {webhook_id} from preset {preset_id}")
+
+        # Remove from database
+        if storage:
+            deleted_count = await storage.clear_preset_webhooks(preset_id)
+            logger.info(
+                f"Removed {deleted_count} webhook(s) for preset '{preset_id}' from database"
+            )
+
+        # Return updated card
+        return HTMLResponse(
+            content=f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div><span style="color: #999;">Not Enabled</span></div>
+                    <div>
+                        <button
+                            hx-post="/app/webhooks/enable/{preset_id}"
+                            hx-target="#preset-{preset_id}"
+                            hx-swap="outerHTML"
+                            class="button button-primary">
+                            Enable
+                        </button>
+                    </div>
+                </div>
+            </div>
+            """
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to disable preset {preset_id}: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f'<div class="warning">Failed to disable preset: {str(e)}</div>',
+            status_code=500,
+        )
@@ -0,0 +1,257 @@
+import os
+
+import click
+import uvicorn
+
+from nextcloud_mcp_server.config import (
+    get_settings,
+)
+from nextcloud_mcp_server.observability import get_uvicorn_logging_config
+
+from .app import get_app
+
+
+@click.command()
+@click.option(
+    "--host", "-h", default="127.0.0.1", show_default=True, help="Server host"
+)
+@click.option(
+    "--port", "-p", type=int, default=8000, show_default=True, help="Server port"
+)
+@click.option(
+    "--log-level",
+    "-l",
+    default="info",
+    show_default=True,
+    type=click.Choice(["critical", "error", "warning", "info", "debug", "trace"]),
+    help="Logging level",
+)
+@click.option(
+    "--transport",
+    "-t",
+    default="sse",
+    show_default=True,
+    type=click.Choice(["sse", "streamable-http", "http"]),
+    help="MCP transport protocol",
+)
+@click.option(
+    "--enable-app",
+    "-e",
+    multiple=True,
+    type=click.Choice(
+        ["notes", "tables", "webdav", "calendar", "contacts", "cookbook", "deck"]
+    ),
+    help="Enable specific Nextcloud app APIs. Can be specified multiple times. If not specified, all apps are enabled.",
+)
+@click.option(
+    "--oauth/--no-oauth",
+    default=None,
+    help="Force OAuth mode (if enabled) or BasicAuth mode (if disabled). By default, auto-detected based on environment variables.",
+)
+@click.option(
+    "--oauth-client-id",
+    envvar="NEXTCLOUD_OIDC_CLIENT_ID",
+    help="OAuth client ID (can also use NEXTCLOUD_OIDC_CLIENT_ID env var)",
+)
+@click.option(
+    "--oauth-client-secret",
+    envvar="NEXTCLOUD_OIDC_CLIENT_SECRET",
+    help="OAuth client secret (can also use NEXTCLOUD_OIDC_CLIENT_SECRET env var)",
+)
+@click.option(
+    "--mcp-server-url",
+    envvar="NEXTCLOUD_MCP_SERVER_URL",
+    default="http://localhost:8000",
+    show_default=True,
+    help="MCP server URL for OAuth callbacks (can also use NEXTCLOUD_MCP_SERVER_URL env var)",
+)
+@click.option(
+    "--nextcloud-host",
+    envvar="NEXTCLOUD_HOST",
+    help="Nextcloud instance URL (can also use NEXTCLOUD_HOST env var)",
+)
+@click.option(
+    "--nextcloud-username",
+    envvar="NEXTCLOUD_USERNAME",
+    help="Nextcloud username for BasicAuth (can also use NEXTCLOUD_USERNAME env var)",
+)
+@click.option(
+    "--nextcloud-password",
+    envvar="NEXTCLOUD_PASSWORD",
+    help="Nextcloud password for BasicAuth (can also use NEXTCLOUD_PASSWORD env var)",
+)
+@click.option(
+    "--oauth-scopes",
+    envvar="NEXTCLOUD_OIDC_SCOPES",
+    default="openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write",
+    show_default=True,
+    help="OAuth scopes to request during client registration. These define the maximum allowed scopes for the client. Note: Actual supported scopes are discovered dynamically from MCP tools at runtime. (can also use NEXTCLOUD_OIDC_SCOPES env var)",
+)
+@click.option(
+    "--oauth-token-type",
+    envvar="NEXTCLOUD_OIDC_TOKEN_TYPE",
+    default="bearer",
+    show_default=True,
+    type=click.Choice(["bearer", "jwt"], case_sensitive=False),
+    help="OAuth token type (can also use NEXTCLOUD_OIDC_TOKEN_TYPE env var)",
+)
+@click.option(
+    "--public-issuer-url",
+    envvar="NEXTCLOUD_PUBLIC_ISSUER_URL",
+    help="Public issuer URL for OAuth (can also use NEXTCLOUD_PUBLIC_ISSUER_URL env var)",
+)
+def run(
+    host: str,
+    port: int,
+    log_level: str,
+    transport: str,
+    enable_app: tuple[str, ...],
+    oauth: bool | None,
+    oauth_client_id: str | None,
+    oauth_client_secret: str | None,
+    mcp_server_url: str,
+    nextcloud_host: str | None,
+    nextcloud_username: str | None,
+    nextcloud_password: str | None,
+    oauth_scopes: str,
+    oauth_token_type: str,
+    public_issuer_url: str | None,
+):
+    """
+    Run the Nextcloud MCP server.
+
+    \b
+    Authentication Modes:
+      - BasicAuth: Set NEXTCLOUD_USERNAME and NEXTCLOUD_PASSWORD
+      - OAuth: Leave USERNAME/PASSWORD unset (requires OIDC app enabled)
+
+    \b
+    Examples:
+      # BasicAuth mode with CLI options
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com \\
+          --nextcloud-username=admin --nextcloud-password=secret
+
+      # BasicAuth mode with env vars (recommended for credentials)
+      $ export NEXTCLOUD_HOST=https://cloud.example.com
+      $ export NEXTCLOUD_USERNAME=admin
+      $ export NEXTCLOUD_PASSWORD=secret
+      $ nextcloud-mcp-server --host 0.0.0.0 --port 8000
+
+      # OAuth mode with auto-registration
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth
+
+      # OAuth mode with pre-configured client
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth \\
+          --oauth-client-id=xxx --oauth-client-secret=yyy
+
+      # OAuth mode with custom scopes and JWT tokens
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth \\
+          --oauth-scopes="openid notes:read notes:write" --oauth-token-type=jwt
+
+      # OAuth with public issuer URL (for Docker/proxy setups)
+      $ nextcloud-mcp-server --nextcloud-host=http://app --oauth \\
+          --public-issuer-url=http://localhost:8080
+    """
+    # Set env vars from CLI options if provided
+    if nextcloud_host:
+        os.environ["NEXTCLOUD_HOST"] = nextcloud_host
+    if nextcloud_username:
+        os.environ["NEXTCLOUD_USERNAME"] = nextcloud_username
+    if nextcloud_password:
+        os.environ["NEXTCLOUD_PASSWORD"] = nextcloud_password
+    if oauth_client_id:
+        os.environ["NEXTCLOUD_OIDC_CLIENT_ID"] = oauth_client_id
+    if oauth_client_secret:
+        os.environ["NEXTCLOUD_OIDC_CLIENT_SECRET"] = oauth_client_secret
+    if oauth_scopes:
+        os.environ["NEXTCLOUD_OIDC_SCOPES"] = oauth_scopes
+    if oauth_token_type:
+        os.environ["NEXTCLOUD_OIDC_TOKEN_TYPE"] = oauth_token_type
+    if mcp_server_url:
+        os.environ["NEXTCLOUD_MCP_SERVER_URL"] = mcp_server_url
+    if public_issuer_url:
+        os.environ["NEXTCLOUD_PUBLIC_ISSUER_URL"] = public_issuer_url
+
+    # Force OAuth mode if explicitly requested
+    if oauth is True:
+        # Clear username/password to force OAuth mode
+        if "NEXTCLOUD_USERNAME" in os.environ:
+            click.echo(
+                "Warning: --oauth flag set, ignoring NEXTCLOUD_USERNAME", err=True
+            )
+            del os.environ["NEXTCLOUD_USERNAME"]
+        if "NEXTCLOUD_PASSWORD" in os.environ:
+            click.echo(
+                "Warning: --oauth flag set, ignoring NEXTCLOUD_PASSWORD", err=True
+            )
+            del os.environ["NEXTCLOUD_PASSWORD"]
+
+        # Validate OAuth configuration
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        if not nextcloud_host:
+            raise click.ClickException(
+                "OAuth mode requires NEXTCLOUD_HOST environment variable to be set"
+            )
+
+        # Check if we have client credentials OR if dynamic registration is possible
+        has_client_creds = os.getenv("NEXTCLOUD_OIDC_CLIENT_ID") and os.getenv(
+            "NEXTCLOUD_OIDC_CLIENT_SECRET"
+        )
+
+        if not has_client_creds:
+            # No client credentials - will attempt dynamic registration
+            # Show helpful message before server starts
+            click.echo("", err=True)
+            click.echo("OAuth Configuration:", err=True)
+            click.echo("  Mode: Dynamic Client Registration", err=True)
+            click.echo("  Host: " + nextcloud_host, err=True)
+            click.echo("  Storage: SQLite (TOKEN_STORAGE_DB)", err=True)
+            click.echo("", err=True)
+            click.echo(
+                "Note: Make sure 'Dynamic Client Registration' is enabled", err=True
+            )
+            click.echo("      in your Nextcloud OIDC app settings.", err=True)
+            click.echo("", err=True)
+        else:
+            click.echo("", err=True)
+            click.echo("OAuth Configuration:", err=True)
+            click.echo("  Mode: Pre-configured Client", err=True)
+            click.echo("  Host: " + nextcloud_host, err=True)
+            click.echo(
+                "  Client ID: "
+                + os.getenv("NEXTCLOUD_OIDC_CLIENT_ID", "")[:16]
+                + "...",
+                err=True,
+            )
+            click.echo("", err=True)
+
+    elif oauth is False:
+        # Force BasicAuth mode - verify credentials exist
+        if not os.getenv("NEXTCLOUD_USERNAME") or not os.getenv("NEXTCLOUD_PASSWORD"):
+            raise click.ClickException(
+                "--no-oauth flag set but NEXTCLOUD_USERNAME or NEXTCLOUD_PASSWORD not set"
+            )
+
+    enabled_apps = list(enable_app) if enable_app else None
+
+    app = get_app(transport=transport, enabled_apps=enabled_apps)
+
+    # Get observability settings and create uvicorn logging config
+    settings = get_settings()
+    uvicorn_log_config = get_uvicorn_logging_config(
+        log_format=settings.log_format,
+        log_level=settings.log_level,
+        include_trace_context=settings.log_include_trace_context,
+    )
+
+    uvicorn.run(
+        app=app,
+        host=host,
+        port=port,
+        log_level=log_level,
+        log_config=uvicorn_log_config,
+    )
+
+
+if __name__ == "__main__":
+    run()
@@ -9,6 +9,7 @@ from httpx import (
    BasicAuth,
    Request,
    Response,
+    Timeout,
 )

 from ..controllers.notes_search import NotesSearchController
@@ -22,6 +23,7 @@ from .sharing import SharingClient
 from .tables import TablesClient
 from .users import UsersClient
 from .webdav import WebDAVClient
+from .webhooks import WebhooksClient

 logger = logging.getLogger(__name__)

@@ -66,6 +68,7 @@ class NextcloudClient:
            auth=auth,
            transport=AsyncDisableCookieTransport(AsyncHTTPTransport()),
            event_hooks={"request": [log_request], "response": [log_response]},
+            timeout=Timeout(timeout=30, connect=5),
        )

        # Initialize app clients
@@ -81,6 +84,7 @@ class NextcloudClient:
        self.users = UsersClient(self._client, username)
        self.groups = GroupsClient(self._client, username)
        self.sharing = SharingClient(self._client, username)
+        self.webhooks = WebhooksClient(self._client, username)

        # Initialize controllers
        self._notes_search = NotesSearchController()
@@ -126,10 +130,75 @@ class NextcloudClient:
        all_notes = self.notes.get_all_notes()
        return await self._notes_search.search_notes(all_notes, query)

+    async def find_files_by_tag(
+        self, tag_name: str, mime_type_filter: str | None = None
+    ) -> list[dict]:
+        """Find files by system tag name, optionally filtered by MIME type.
+
+        This method coordinates tag lookup and file retrieval via WebDAV:
+        1. Look up the tag ID by name
+        2. Get all files with that tag (via REPORT with full metadata)
+        3. Optionally filter by MIME type
+
+        Args:
+            tag_name: Name of the system tag to search for (e.g., "vector-index")
+            mime_type_filter: Optional MIME type filter (e.g., "application/pdf")
+
+        Returns:
+            List of file dictionaries with WebDAV properties (path, size, content_type, etc.)
+
+        Raises:
+            RuntimeError: If tag lookup or file query fails
+
+        Examples:
+            # Find all files with "vector-index" tag
+            files = await nc_client.find_files_by_tag("vector-index")
+
+            # Find only PDFs with the tag
+            pdfs = await nc_client.find_files_by_tag("vector-index", "application/pdf")
+        """
+        # Look up tag by name using WebDAV
+        tag = await self.webdav.get_tag_by_name(tag_name)
+        if not tag:
+            logger.debug(f"Tag '{tag_name}' not found, returning empty list")
+            return []
+
+        # Get files with this tag (returns full file info from REPORT)
+        files = await self.webdav.get_files_by_tag(tag["id"])
+        if not files:
+            logger.debug(f"No files found with tag '{tag_name}'")
+            return []
+
+        logger.debug(f"Found {len(files)} files with tag '{tag_name}'")
+
+        # Apply MIME type filter if specified
+        if mime_type_filter:
+            filtered_files = [
+                f
+                for f in files
+                if f.get("content_type", "").startswith(mime_type_filter)
+            ]
+            logger.info(
+                f"Returning {len(filtered_files)} files with tag '{tag_name}' (filtered by {mime_type_filter})"
+            )
+            return filtered_files
+
+        logger.info(f"Returning {len(files)} files with tag '{tag_name}'")
+        return files
+
    def _get_webdav_base_path(self) -> str:
        """Helper to get the base WebDAV path for the authenticated user."""
        return f"/remote.php/dav/files/{self.username}"

+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - closes all clients."""
+        await self.close()
+        return False  # Don't suppress exceptions
+
    async def close(self):
        """Close the HTTP client and CalDAV client."""
        await self._client.aclose()
@@ -5,8 +5,15 @@ import time
 from abc import ABC
 from functools import wraps

+import anyio
 from httpx import AsyncClient, HTTPStatusError, RequestError, codes

+from nextcloud_mcp_server.observability.metrics import (
+    record_nextcloud_api_call,
+    record_nextcloud_api_retry,
+)
+from nextcloud_mcp_server.observability.tracing import trace_nextcloud_api_call
+
 logger = logging.getLogger(__name__)


@@ -38,7 +45,10 @@ def retry_on_429(func):
                    logger.warning(
                        f"429 Client Error: Too Many Requests, Number of attempts: {retries}"
                    )
-                    time.sleep(5)
+                    # Record retry metric (extract app name from args if available)
+                    if len(args) > 0 and hasattr(args[0], "app_name"):
+                        record_nextcloud_api_retry(app=args[0].app_name, reason="429")
+                    await anyio.sleep(5)
                elif e.response.status_code == 404:
                    # 404 errors are often expected (e.g., checking if attachments exist)
                    # Log as debug instead of warning
@@ -72,6 +82,9 @@ def retry_on_429(func):
 class BaseNextcloudClient(ABC):
    """Base class for all Nextcloud app clients."""

+    # Subclasses should set this to identify the app for metrics/tracing
+    app_name: str = "unknown"
+
    def __init__(self, http_client: AsyncClient, username: str):
        """Initialize with shared HTTP client and username.

@@ -88,7 +101,7 @@ class BaseNextcloudClient(ABC):

    @retry_on_429
    async def _make_request(self, method: str, url: str, **kwargs):
-        """Common request wrapper with logging and error handling.
+        """Common request wrapper with logging, tracing, and error handling.

        Args:
            method: HTTP method
@@ -99,6 +112,47 @@ class BaseNextcloudClient(ABC):
            Response object
        """
        logger.debug(f"Making {method} request to {url}")
-        response = await self._client.request(method, url, **kwargs)
-        response.raise_for_status()
-        return response
+
+        # Start timer for metrics
+        start_time = time.time()
+        status_code = 0
+
+        try:
+            # Wrap request in trace span
+            with trace_nextcloud_api_call(
+                app=self.app_name,
+                method=method,
+                path=url,
+            ):
+                response = await self._client.request(method, url, **kwargs)
+                status_code = response.status_code
+                response.raise_for_status()
+
+                # Record successful API call metrics
+                duration = time.time() - start_time
+                record_nextcloud_api_call(
+                    app=self.app_name,
+                    method=method,
+                    status_code=status_code,
+                    duration=duration,
+                )
+
+                return response
+
+        except (HTTPStatusError, RequestError) as e:
+            # Record error metrics
+            if isinstance(e, HTTPStatusError):
+                status_code = e.response.status_code
+            else:
+                status_code = 0  # Connection error, no status code
+
+            duration = time.time() - start_time
+            record_nextcloud_api_call(
+                app=self.app_name,
+                method=method,
+                status_code=status_code,
+                duration=duration,
+            )
+
+            # Re-raise the exception
+            raise
@@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
 class ContactsClient(BaseNextcloudClient):
    """Client for NextCloud CardDAV contact operations."""

+    app_name = "contacts"
+
    def _get_carddav_base_path(self) -> str:
        """Helper to get the base CardDAV path for contacts."""
        return f"/remote.php/dav/addressbooks/users/{self.username}"
@@ -13,6 +13,8 @@ logger = logging.getLogger(__name__)
 class CookbookClient(BaseNextcloudClient):
    """Client for Nextcloud Cookbook app operations."""

+    app_name = "cookbook"
+
    async def get_version(self) -> Dict[str, Any]:
        """Get Cookbook app and API version."""
        response = await self._make_request("GET", "/apps/cookbook/api/version")
@@ -17,6 +17,8 @@ from nextcloud_mcp_server.models.deck import (
 class DeckClient(BaseNextcloudClient):
    """Client for Nextcloud Deck app operations."""

+    app_name = "deck"
+
    def _get_deck_headers(
        self, additional_headers: Optional[Dict[str, str]] = None
    ) -> Dict[str, str]:
@@ -11,6 +11,8 @@ logger = logging.getLogger(__name__)
 class GroupsClient(BaseNextcloudClient):
    """Client for Nextcloud Groups API operations."""

+    app_name = "groups"
+
    @retry_on_429
    async def search_groups(
        self,
@@ -11,23 +11,64 @@ logger = logging.getLogger(__name__)
 class NotesClient(BaseNextcloudClient):
    """Client for Nextcloud Notes app operations."""

+    app_name = "notes"
+
    async def get_settings(self) -> Dict[str, Any]:
        """Get Notes app settings."""
        response = await self._make_request("GET", "/apps/notes/api/v1/settings")
        return response.json()

-    async def get_all_notes(self) -> AsyncIterator[Dict[str, Any]]:
-        """Get all notes, yielding them one at a time."""
+    async def get_all_notes(
+        self, prune_before: Optional[int] = None
+    ) -> AsyncIterator[Dict[str, Any]]:
+        """Get all notes, yielding them one at a time.
+
+        The Notes API returns changed notes with full data in chunks, and ALL note IDs
+        (with only 'id' field) in the last chunk for deletion detection. This causes
+        duplicates which we handle by tracking seen IDs (first occurrence with full
+        data is kept, later pruned duplicates are skipped).
+
+        Args:
+            prune_before: Optional Unix timestamp. Notes unchanged since this time
+                         are pruned (only 'id' field returned in last chunk).
+                         Reduces data transfer for large note collections.
+
+        Yields:
+            Note dictionaries with full data (deduplicated).
+        """
        cursor = ""
+        seen_ids: set[int] = set()

        while True:
+            params: Dict[str, Any] = {"chunkSize": 100}
+            if cursor:
+                params["chunkCursor"] = cursor
+            if prune_before is not None:
+                params["pruneBefore"] = prune_before
+
            response = await self._make_request(
                "GET",
                "/apps/notes/api/v1/notes",
-                params={"chunkSize": 10, "chunkCursor": cursor},
+                params=params,
            )
-            for note in response.json():
+            response_data = response.json()
+
+            for note in response_data:
+                note_id = note.get("id")
+                if note_id is None:
+                    logger.warning(f"Skipping note without ID: {note}")
+                    continue
+
+                # Skip duplicates (API returns all IDs in last chunk for deletion detection)
+                if note_id in seen_ids:
+                    logger.debug(
+                        f"Skipping duplicate note {note_id} (pruned version in last chunk)"
+                    )
+                    continue
+
+                seen_ids.add(note_id)
                yield note
+
            if "X-Notes-Chunk-Cursor" not in response.headers:
                break
            cursor = response.headers["X-Notes-Chunk-Cursor"]
--- a/Show More
+++ b/Show More