Update README

bump: version 0.48.6 → 0.49.0
Merge pull request #363 from cbcoutinho/feature/news-app-integration
2025-12-08 06:25:21 +00:00 · 2025-12-08 06:23:14 +00:00 · 2025-12-08 07:22:42 +01:00 · 2025-12-08 01:09:02 +01:00 · 2025-12-07 22:36:48 +01:00 · 2025-12-07 13:28:38 +01:00
175 changed files with 30771 additions and 3302 deletions
@@ -5,3 +5,5 @@
 !uv.lock

 !nextcloud_mcp_server/**/*.py
+!nextcloud_mcp_server/**/*.html
+!nextcloud_mcp_server/auth/static/*
@@ -15,17 +15,17 @@ jobs:
      packages: write
    steps:
      - name: Check out
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
        with:
          fetch-depth: 0
          token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
      - name: Create bump and changelog
-        uses: commitizen-tools/commitizen-action@5b0848cd060263e24602d1eba03710e056ef7711 # 0.24.0
+        uses: commitizen-tools/commitizen-action@bb4f1df6601e2a1a891506581b0c53acdc88e07d # 0.26.0
        with:
          github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
          changelog_increment_filename: body.md
      - name: Release
-        uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
+        uses: softprops/action-gh-release@a06a81a03ee405af7f2048a818ed3f03bbf83c7b # v2.5.0
        with:
          body_path: "body.md"
          tag_name: v${{ env.REVISION }}
@@ -0,0 +1,57 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@6337623ebba10cf8c8214b507993f8062fd4ccfb # v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          prompt: |
+            REPO: ${{ github.repository }}
+            PR NUMBER: ${{ github.event.pull_request.number }}
+
+            Please review this pull request and provide feedback on:
+            - Code quality and best practices
+            - Potential bugs or issues
+            - Performance considerations
+            - Security concerns
+            - Test coverage
+
+            Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
+
+            Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
+
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
+
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@6337623ebba10cf8c8214b507993f8062fd4ccfb # v1
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr:*)'
+
@@ -12,11 +12,11 @@ jobs:
      packages: write
    steps:
      - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5
+        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
        with:
          # list of Docker images to use as base name for tags
          images: |
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
        with:
          fetch-depth: 0

@@ -0,0 +1,105 @@
+name: RAG Evaluation
+
+on:
+  workflow_dispatch:
+    inputs:
+      manual_path:
+        description: 'Path to Nextcloud User Manual PDF in Nextcloud'
+        required: false
+        default: 'Nextcloud Manual.pdf'
+      embedding_model:
+        description: 'OpenAI embedding model'
+        required: false
+        default: 'openai/text-embedding-3-small'
+      generation_model:
+        description: 'OpenAI generation model'
+        required: false
+        default: 'openai/gpt-4o-mini'
+
+jobs:
+  rag-evaluation:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      models: read
+
+    steps:
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
+
+      - name: Run docker compose with vector sync
+        uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
+        with:
+          compose-file: |
+            ./docker-compose.yml
+            ./docker-compose.ci.yml
+          up-flags: "--build"
+        env:
+          # Environment variables passed to docker-compose.ci.yml
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+          VECTOR_SYNC_SCAN_INTERVAL: "5"
+
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
+
+      - name: Wait for Nextcloud to be ready
+        run: |
+          echo "Waiting for Nextcloud..."
+          max_attempts=60
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "Service did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+            sleep 5
+          done
+          echo "Nextcloud is ready."
+
+      - name: Wait for MCP server to be ready
+        run: |
+          echo "Waiting for MCP server..."
+          max_attempts=30
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "MCP server did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
+            sleep 2
+          done
+          echo "MCP server is ready."
+
+      - name: Run RAG evaluation tests
+        env:
+          NEXTCLOUD_HOST: "http://localhost:8080"
+          NEXTCLOUD_USERNAME: "admin"
+          NEXTCLOUD_PASSWORD: "admin"
+          RAG_MANUAL_PATH: ${{ inputs.manual_path }}
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+        run: |
+          uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai
+
+      - name: Capture MCP container logs
+        if: always()
+        run: |
+          echo "=== MCP Container Logs ==="
+          docker compose logs mcp --tail=500
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5
+        with:
+          name: rag-evaluation-results
+          path: |
+            pytest-results.xml
+          retention-days: 30
@@ -18,9 +18,9 @@ jobs:
      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
      - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Install Python 3.11
        run: uv python install 3.11
      - name: Build
@@ -9,9 +9,9 @@ jobs:
  linting:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Check format
        run: |
          uv run --frozen ruff format --diff
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
        with:
          submodules: 'true'

@@ -35,7 +35,7 @@ jobs:
      ###### Required to build OIDC App ######

      - name: Set up php 8.4
-        uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
+        uses: shivammathur/setup-php@44454db4f0199b8b9685a5d763dc37cbf79108e1 # v2
        with:
          php-version: 8.4
          coverage: none
@@ -49,14 +49,14 @@ jobs:


      - name: Run docker compose
-        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
+        uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
        with:
          compose-file: "./docker-compose.yml"
          #compose-flags: "--profile qdrant"
          up-flags: "--build"

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4

      - name: Install Playwright dependencies
        run: |
@@ -85,4 +85,4 @@ jobs:
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
        run: |
-          uv run pytest -v --log-cli-level=WARN --ignore=tests/manual
+          uv run pytest -v --log-cli-level=WARN -m unit -m smoke
@@ -5,5 +5,14 @@ __pycache__/
 .env.local
 .env.*.local

+# Git
+worktrees/
+
+docker-compose.override.yml
+
 # Generated by pytest used to login users
 .nextcloud_oauth_*.json
+.playwright-mcp/
+
+# RAG Evaluation
+tests/rag_evaluation/fixtures/
@@ -1,6 +1,6 @@
-[submodule "oidc"]
-	path = third_party/oidc
-	url = https://github.com/cbcoutinho/oidc
 [submodule "third_party/oidc"]
 	path = third_party/oidc
 	url = https://github.com/cbcoutinho/oidc
+[submodule "third_party/notes"]
+	path = third_party/notes
+	url = https://github.com/cbcoutinho/notes
@@ -1,3 +1,355 @@
+## v0.49.0 (2025-12-08)
+
+### Feat
+
+- **news**: add Nextcloud News app integration
+
+### Fix
+
+- resolve all type checking errors (8 errors fixed)
+
+### Refactor
+
+- **news**: simplify vector sync to fetch all items
+
+### Perf
+
+- **news**: use direct API endpoint for get_item()
+
+## v0.48.6 (2025-12-03)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.23,<1.24
+
+## v0.48.5 (2025-11-28)
+
+### Fix
+
+- **deps**: update dependency pillow to v12
+
+## v0.48.4 (2025-11-23)
+
+### Fix
+
+- Add rate limit retry logic to OpenAI provider
+
+## v0.48.3 (2025-11-23)
+
+### Fix
+
+- Increase MCP sampling timeout to 5 minutes for slower LLMs
+
+## v0.48.2 (2025-11-23)
+
+### Fix
+
+- Share vector sync state with FastMCP session lifespan via module singleton
+- Share vector sync state with FastMCP session lifespan via module singleton
+
+## v0.48.1 (2025-11-23)
+
+### Fix
+
+- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
+
+### Refactor
+
+- Move background tasks to server lifespan and deprecate SSE transport
+
+## v0.48.0 (2025-11-23)
+
+### Feat
+
+- Add tag management methods to WebDAV client
+
+## v0.47.0 (2025-11-23)
+
+### Feat
+
+- Add OpenAI provider support for embeddings and generation
+
+## v0.46.2 (2025-11-22)
+
+### Fix
+
+- **smithery**: Enable JSON response format for scanner compatibility
+
+## v0.46.1 (2025-11-22)
+
+### Perf
+
+- Optimize vector viz search performance
+
+## v0.46.0 (2025-11-22)
+
+### Feat
+
+- Add Smithery CLI deployment support
+- Implement ADR-016 Smithery stateless deployment mode
+
+### Fix
+
+- **smithery**: Add JSON Schema metadata to mcp-config endpoint
+- **smithery**: Use container runtime pattern for config discovery
+- Add Smithery lifespan and auth mode detection
+
+## v0.45.0 (2025-11-22)
+
+### Feat
+
+- Add context expansion to semantic search with chunk overlap removal
+- Use Ollama native batch API in embed_batch()
+- Implement Qdrant placeholder state management
+- Switch files to use numeric IDs with file_path resolution
+- Implement per-chunk vector visualization with context expansion
+
+### Fix
+
+- Use alpha_composite for proper RGBA highlight blending
+- Remove pymupdf.layout.activate() to fix page_chunks behavior
+- Centralize PDF processing and generate separate images per chunk
+- Set is_placeholder=False in processor to fix search filtering
+- Increase placeholder staleness threshold to 5x scan interval
+- Add placeholder staleness check to prevent duplicate processing
+- Use empty SparseVector instead of None for placeholders
+- Return empty array instead of null for query_coords when no results
+- Align PDF text extraction between indexing and context expansion
+- Update models and viz to use int-only doc_id
+- Reconstruct full content for notes to match indexed offsets
+- Add async/await, PDF metadata, and type safety fixes
+
+### Refactor
+
+- Simplify PDF text extraction with single to_markdown call
+
+### Perf
+
+- Optimize PDF processing with parallel extraction and single-render highlights
+
+## v0.44.1 (2025-11-21)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.22,<1.23
+
+## v0.44.0 (2025-11-19)
+
+### Feat
+
+- Improve vector visualization with static assets and fixes
+- Redesign UI to match Nextcloud ecosystem aesthetic
+
+### Fix
+
+- Improve 3D plot rendering with explicit dimensions and window resize support
+- Preserve 3D plot camera and improve documentation
+- Preserve 3D plot camera position and fix CSS loading
+
+## v0.43.0 (2025-11-18)
+
+### Feat
+
+- Replace custom document chunker with LangChain MarkdownTextSplitter
+
+## v0.42.0 (2025-11-17)
+
+### Feat
+
+- **viz**: Add dual-score display and improve UI controls
+
+## v0.41.0 (2025-11-17)
+
+### Feat
+
+- add configurable fusion algorithms for BM25 hybrid search
+- add chunk position tracking to vector indexing and search
+- add vector viz template and chunk context endpoint
+
+### Fix
+
+- prevent infinite loop in DocumentChunker with position tracking
+- Relax SearchResult validation to support DBSF fusion scores > 1.0
+
+## v0.40.0 (2025-11-16)
+
+### Feat
+
+- add unified provider architecture with Amazon Bedrock support
+
+### Fix
+
+- suppress Starlette middleware type warnings in ty checker
+
+## v0.39.0 (2025-11-16)
+
+### Feat
+
+- Implement BM25 hybrid search with native Qdrant RRF fusion
+
+### Fix
+
+- Handle named vectors in visualization and semantic search
+- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
+- Update viz routes to use BM25 hybrid search after refactor
+
+## v0.38.0 (2025-11-16)
+
+### Feat
+
+- add concurrent uploads and --force flag to upload command
+- implement RAG evaluation framework with CLI tooling
+
+### Fix
+
+- download qrels from BEIR ZIP instead of HuggingFace
+
+### Refactor
+
+- migrate asyncio to anyio for consistent structured concurrency
+- replace httpx client with NextcloudClient in upload command
+
+### Perf
+
+- Eliminate double-fetching in semantic search sampling
+- fix vector viz search performance and visual encoding
+- make note deletion concurrent in upload --force
+
+## v0.37.0 (2025-11-16)
+
+### Feat
+
+- Add OpenTelemetry tracing to @instrument_tool decorator
+
+## v0.36.0 (2025-11-15)
+
+### BREAKING CHANGE
+
+- Search algorithms now require Qdrant to be populated.
+Vector sync must be enabled and documents indexed for search to work.
+
+### Feat
+
+- Normalize hybrid search RRF scores to 0-1 range
+- Enhance vector visualization UI and parallelize search verification
+- Add Vector Viz tab to app home page
+- Add vector visualization pane with multi-select document types
+- Implement custom PCA to remove sklearn dependency
+- Add multi-document Protocol with cross-app search support
+- Update nc_semantic_search tool with algorithm selection
+- Implement unified search algorithm module
+
+### Fix
+
+- Reorder tabs and fix viz pane session access
+
+### Refactor
+
+- Optimize Nextcloud access verification with centralized filtering
+- Make all search algorithms query Qdrant payload, not Nextcloud
+
+### Perf
+
+- Exclude vector-sync status polling from distributed tracing
+
+## v0.35.0 (2025-11-15)
+
+### Feat
+
+- Enable SSE transport for mcp service and update test fixtures
+
+## v0.34.2 (2025-11-13)
+
+### Fix
+
+- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
+
+## v0.34.1 (2025-11-13)
+
+### Fix
+
+- return all notes when search query is empty
+
+## v0.34.0 (2025-11-13)
+
+### Feat
+
+- Complete Phase 5 - Instrument all 93 MCP tools
+- Add instrumentation decorator and apply to notes tools (Phase 5)
+- Add OAuth token and database metrics (Phases 3-4)
+- Add metrics instrumentation for queue, health, and database operations
+
+## v0.33.1 (2025-11-13)
+
+### Fix
+
+- Move grafana_folder from labels to annotations
+
+## v0.33.0 (2025-11-13)
+
+### Feat
+
+- Add Grafana dashboard and vector sync metric instrumentation
+
+## v0.32.1 (2025-11-12)
+
+### Fix
+
+- add dynamic dimension detection for Ollama embedding models
+
+## v0.32.0 (2025-11-11)
+
+### Feat
+
+- **ollama**: Pull model on startup if not available in ollama
+- add dynamic vector sync status updates with htmx polling
+- add webhook management UI and BeforeNodeDeletedEvent support
+- validate Nextcloud webhook schemas and document findings
+
+### Fix
+
+- improve webapp tab UI with CSS Grid and viewport-filling container
+
+### Refactor
+
+- move webapp from /user/page to /app
+- consolidate database storage for webhooks and OAuth tokens
+
+## v0.31.1 (2025-11-10)
+
+### Refactor
+
+- simplify OpenTelemetry tracing configuration
+
+## v0.31.0 (2025-11-10)
+
+### Feat
+
+- skip tracing for health and metrics endpoints
+
+### Fix
+
+- add retry logic for ETag conflicts in category change test
+- optimize Notes API pagination with pruneBefore parameter
+
+## v0.30.0 (2025-11-10)
+
+### Feat
+
+- **helm**: Add document chunking configuration
+- **vector**: Add configurable chunk size and overlap for document embedding
+- **vector**: Support multiple embedding models with auto-generated collection names
+
+### Fix
+
+- Support in-memory Qdrant for CI testing
+
+## v0.29.2 (2025-11-09)
+
+### Fix
+
+- **helm**: Set default strategy to Recreate
+
 ## v0.29.1 (2025-11-09)

 ### Fix
@@ -51,7 +403,7 @@
 - implement ADR-009 - refactor semantic search to use generic semantic:read scope
 - implement MCP sampling for semantic search RAG (ADR-008)
 - add optional vector database and semantic search to helm chart
- add vector sync processing status to /user/page endpoint
+- add vector sync processing status to /app endpoint
 - implement semantic search tool and fix vector sync issues (ADR-007 Phase 3)
 - implement vector sync scanner and processor (ADR-007 Phase 2)

@@ -5,23 +5,29 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Coding Conventions

 ### async/await Patterns
- **Use anyio + asyncio hybrid** - Both libraries are available
+- **Use anyio for all async operations** - Provides structured concurrency
  - pytest runs in `anyio` mode (`anyio_mode = "auto"` in pyproject.toml)
-  - asyncio used in auth modules (refresh_token_storage.py, token_exchange.py, token_broker.py)
-  - anyio used in calendar.py, client_registration.py, app.py
+  - Use `anyio.create_task_group()` for concurrent execution (NOT `asyncio.gather()`)
+  - Use `anyio.Lock()` for synchronization primitives (NOT `asyncio.Lock()`)
+  - Use `anyio.run()` for entry points (NOT `asyncio.run()`)
  - Prefer standard async/await syntax without explicit library imports when possible
+  - Examples: app.py, search/hybrid.py, search/verification.py, auth/token_broker.py

 ### Type Hints
 - **Use Python 3.10+ union syntax**: `str | None` instead of `Optional[str]`
 - **Use lowercase generics**: `dict[str, Any]` instead of `Dict[str, Any]`
 - **Type all function signatures** - Parameters and return types
- **No explicit type checker configured** - Ruff handles linting only
+- **Type checker**: `ty` is configured for static type checking
+  ```bash
+  uv run ty check -- nextcloud_mcp_server
+  ```

 ### Code Quality
- **Run ruff before committing**:
+- **Run ruff and ty before committing**:
  ```bash
  uv run ruff check
  uv run ruff format
+  uv run ty check -- nextcloud_mcp_server
  ```
 - **Ruff configuration** in pyproject.toml (extends select: ["I"] for import sorting)

@@ -55,8 +61,60 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - `nextcloud_mcp_server/server/` - MCP tool/resource definitions
 - `nextcloud_mcp_server/auth/` - OAuth/OIDC authentication
 - `nextcloud_mcp_server/models/` - Pydantic response models
+- `nextcloud_mcp_server/providers/` - Unified LLM provider infrastructure (embeddings + generation)
 - `tests/` - Layered test suite (unit, smoke, integration, load)

+### Provider Architecture (ADR-015)
+
+**Unified Provider System** for embeddings and text generation:
+
+**Location:** `nextcloud_mcp_server/providers/`
+- `base.py` - `Provider` ABC with optional capabilities
+- `registry.py` - Auto-detection and factory pattern
+- `ollama.py` - Ollama provider (embeddings + generation)
+- `anthropic.py` - Anthropic provider (generation only)
+- `bedrock.py` - Amazon Bedrock provider (embeddings + generation)
+- `simple.py` - Simple in-memory provider (embeddings only, fallback)
+
+**Usage:**
+```python
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Auto-detects from environment
+
+# Check capabilities
+if provider.supports_embeddings:
+    embeddings = await provider.embed_batch(texts)
+
+if provider.supports_generation:
+    text = await provider.generate("prompt", max_tokens=500)
+```
+
+**Environment Variables:**
+
+Bedrock:
+- `AWS_REGION` - AWS region (e.g., "us-east-1")
+- `BEDROCK_EMBEDDING_MODEL` - Embedding model ID (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL` - Generation model ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` - Optional, uses AWS credential chain
+
+Ollama:
+- `OLLAMA_BASE_URL` - API URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL` - Embedding model (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL` - Generation model (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL` - SSL verification (default: "true")
+
+Simple (fallback, no config needed):
+- `SIMPLE_EMBEDDING_DIMENSION` - Dimension (default: 384)
+
+**Auto-Detection Priority:** Bedrock → Ollama → Simple
+
+**Backward Compatibility:**
+- Old code using `nextcloud_mcp_server.embedding.get_embedding_service()` still works
+- `EmbeddingService` now wraps `get_provider()` internally
+
+**For Details:** See `docs/ADR-015-unified-provider-architecture.md`
+
 ## Development Commands (Quick Reference)

 ### Testing
@@ -1,17 +1,24 @@
-FROM ghcr.io/astral-sh/uv:0.9.8-python3.11-alpine@sha256:6c842c49ad032f46b62f32a7e7779f45f12671a8e0d82ea24c766ab62d58b396
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+COPY --from=ghcr.io/astral-sh/uv:0.9.16@sha256:ae9ff79d095a61faf534a882ad6378e8159d2ce322691153d68d2afac7422840 /uv /uvx /bin/

 # Install dependencies
 # 1. git (required for caldav dependency from git)
 # 2. sqlite for development with token db
-RUN apk add --no-cache git sqlite
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git \
+    tesseract-ocr \
+    sqlite3 && apt clean

 WORKDIR /app

 COPY . .

-RUN uv sync --locked --no-dev
+RUN uv sync --locked --no-dev --no-editable --no-cache

 ENV PYTHONUNBUFFERED=1
 ENV VIRTUAL_ENV=/app/.venv
+ENV PATH=/app/.vnev/bin:$PATH
+ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

 ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
@@ -0,0 +1,44 @@
+# Dockerfile for Smithery stateless deployment
+# ADR-016: Stateless mode for multi-user public Nextcloud instances
+#
+# This image excludes:
+# - Vector database dependencies (qdrant-client)
+# - Background sync workers
+# - Admin UI routes (/app)
+# - Semantic search tools
+#
+# Features included:
+# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
+# - Per-session app password authentication
+# - Multi-user support via Smithery session config
+
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+WORKDIR /app
+
+# Install uv for fast dependency management
+COPY --from=ghcr.io/astral-sh/uv:0.9.16@sha256:ae9ff79d095a61faf534a882ad6378e8159d2ce322691153d68d2afac7422840 /uv /uvx /bin/
+
+# Install dependencies
+# 1. git (required for caldav dependency from git)
+# 2. sqlite for development with token db
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git
+
+# Copy project files
+COPY . .
+
+RUN uv sync --locked --no-dev --no-editable --no-cache
+
+# Set Smithery mode environment variables
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081 by default
+EXPOSE 8081
+
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
+
+CMD ["/app/.venv/bin/smithery-main"]
@@ -1,5 +1,10 @@
+<p align="center">
+  <img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
+</p>
+
 # Nextcloud MCP Server

+[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
 [![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)

 **A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
@@ -13,7 +18,20 @@ This is a **dedicated standalone MCP server** designed for external MCP clients

 ## Quick Start

-Get up and running in 60 seconds using Docker:
+The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
+
+1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
+2. Click "Deploy" and configure:
+   - **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
+   - **Username**: Your Nextcloud username
+   - **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
+
+> [!NOTE]
+> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
+
+## Docker (Self-Hosted)
+
+For full features including semantic search, run with Docker:

 ```bash
 # 1. Create a minimal configuration
@@ -29,10 +47,15 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \

 # 3. Test the connection
 curl http://127.0.0.1:8000/health/ready
+
+# 4. Connect to the endpoint
+http://127.0.0.1:8000/sse
+
+# Or with --transport streamable-http
+http://127.0.0.1:8000/mcp
 ```

 **Next Steps:**
- Create an app password in Nextcloud: Settings → Security → Devices & sessions
 - Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
 - See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)

@@ -123,6 +146,7 @@ This enables natural language queries and helps discover related content across
 - **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
 - **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
 - **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
+- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing

 ### Advanced Topics
 - **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works (experimental)
@@ -198,4 +222,4 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo

 - [Model Context Protocol](https://github.com/modelcontextprotocol)
 - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
- [Nextcloud](https://nextcloud.com/)
+- [Nextcloud](https://nextcloud.com/)
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -euox pipefail
+
+php /var/www/html/occ app:enable news
@@ -2,4 +2,30 @@

 set -euox pipefail

-php /var/www/html/occ app:enable notes
+echo "Installing and configuring notes app for testing..."
+
+# Check if development notes app is mounted at /opt/apps/notes
+if [ -d /opt/apps/notes ]; then
+    echo "Development notes app found at /opt/apps/notes"
+
+    # Remove any existing notes app in apps (from app store or old symlink)
+    if [ -e /var/www/html/custom_apps/notes ]; then
+        echo "Removing existing notes in apps..."
+        rm -rf /var/www/html/custom_apps/notes
+    fi
+
+    # Create symlink from apps to the mounted development version
+    # Per Nextcloud docs: apps outside server root need symlinks in server root
+    echo "Creating symlink: custom_apps/notes -> /opt/apps/notes"
+    ln -sf /opt/apps/notes /var/www/html/custom_apps/notes
+
+    echo "Enabling notes app from /opt/apps (development mode via symlink)"
+    php /var/www/html/occ app:enable notes
+elif [ -d /var/www/html/custom_apps/notes ]; then
+    echo "notes app directory found in apps (already installed)"
+    php /var/www/html/occ app:enable notes
+else
+    echo "notes app not found, installing from app store..."
+    php /var/www/html/occ app:install notes
+    php /var/www/html/occ app:enable notes
+fi
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
+  <rect width="512" height="512" rx="80" ry="80" fill="#0082C9"/>
+  <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+</svg>
@@ -1,9 +1,9 @@
 dependencies:
 - name: qdrant
  repository: https://qdrant.github.io/qdrant-helm
-  version: 1.15.5
+  version: 1.16.2
 - name: ollama
  repository: https://otwld.github.io/ollama-helm
-  version: 1.34.0
-digest: sha256:d51c97d05be2614b751c0dd7267ef7dc959eff5ebef859c5f895c5c554b7a874
-generated: "2025-11-09T17:08:02.86648061Z"
+  version: 1.35.0
+digest: sha256:bcb0779739e4710b90bb65f6a7baeaa295bd0ba9776f8a1cf8d9b69d233c8ec0
+generated: "2025-12-05T11:11:27.999374001Z"
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.29.1
-appVersion: "0.29.1"
+version: 0.49.0
+appVersion: "0.49.0"
 keywords:
  - nextcloud
  - mcp
@@ -21,12 +21,16 @@ home: https://github.com/cbcoutinho/nextcloud-mcp-server
 sources:
  - https://github.com/cbcoutinho/nextcloud-mcp-server
 icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
+annotations:
+  # Grafana dashboard support
+  grafana_dashboard: "true"
+  grafana_dashboard_folder: "Nextcloud MCP"
 dependencies:
  - name: qdrant
-    version: "1.15.5"
+    version: "1.16.2"
    repository: https://qdrant.github.io/qdrant-helm
    condition: qdrant.networkMode.deploySubchart
  - name: ollama
-    version: "1.34.0"
+    version: "1.35.0"
    repository: https://otwld.github.io/ollama-helm
    condition: ollama.enabled
@@ -280,6 +280,72 @@ Use OpenAI or any OpenAI-compatible API instead of Ollama.
 | `openai.secretKey` | Key in secret containing API key | `api-key` |
 | `openai.baseUrl` | Custom API endpoint (optional) | `""` |

+#### Observability & Monitoring
+
+The chart includes comprehensive observability features including Prometheus metrics, OpenTelemetry tracing, and Grafana dashboards.
+
+**Metrics Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.metrics.enabled` | Enable Prometheus metrics | `true` |
+| `observability.metrics.port` | Metrics port | `9090` |
+| `observability.metrics.path` | Metrics endpoint path | `/metrics` |
+
+**Tracing Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.tracing.enabled` | Enable OpenTelemetry tracing | `false` |
+| `observability.tracing.endpoint` | OTLP collector endpoint | `""` |
+| `observability.tracing.serviceName` | Service name in traces | `nextcloud-mcp-server` |
+| `observability.tracing.samplingRate` | Trace sampling rate (0.0-1.0) | `1.0` |
+
+**Logging Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.logging.format` | Log format (json or text) | `json` |
+| `observability.logging.level` | Log level | `INFO` |
+| `observability.logging.includeTraceContext` | Include trace IDs in logs | `true` |
+
+**ServiceMonitor (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `serviceMonitor.enabled` | Create ServiceMonitor resource | `false` |
+| `serviceMonitor.interval` | Scrape interval | `30s` |
+| `serviceMonitor.scrapeTimeout` | Scrape timeout | `10s` |
+| `serviceMonitor.labels` | Additional labels for ServiceMonitor | `{}` |
+
+**PrometheusRule (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `prometheusRule.enabled` | Create PrometheusRule with alert rules | `false` |
+| `prometheusRule.labels` | Additional labels for PrometheusRule | `{}` |
+
+**Grafana Dashboards:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `dashboards.enabled` | Enable automatic dashboard provisioning | `false` |
+| `dashboards.grafanaFolder` | Grafana folder name for dashboards | `Nextcloud MCP` |
+| `dashboards.labels` | Additional labels for dashboard ConfigMap | `{}` |
+| `dashboards.annotations` | Additional annotations for dashboard ConfigMap | `{}` |
+
+When `dashboards.enabled` is `true`, a ConfigMap with the Grafana dashboard is created with the `grafana_dashboard: "1"` label. This enables automatic discovery by Grafana sidecar containers (commonly used with kube-prometheus-stack).
+
+The dashboard provides comprehensive monitoring including:
+- HTTP request metrics (RED pattern: Rate, Errors, Duration)
+- MCP tool performance and errors
+- Nextcloud API performance by app (notes, calendar, contacts, etc.)
+- OAuth token operations and cache hit rates
+- External dependency health (Nextcloud, Qdrant, Keycloak, Unstructured API)
+- Vector sync processing pipeline (when enabled)
+
+For manual import or more details, see `charts/nextcloud-mcp-server/dashboards/README.md`.
+
 ## Examples

 ### Example 1: Basic Auth with Ingress
@@ -6,14 +6,57 @@ This directory contains example Grafana dashboards for monitoring the Nextcloud

 ### nextcloud-mcp-server.json

-Comprehensive dashboard with the following panels:
+All-in-one Operations Dashboard with comprehensive monitoring across all system components.

- **Request Rate**: HTTP requests per second by method and endpoint
- **Error Rate**: Percentage of 5xx errors
- **Request Latency**: P50 and P95 latency by endpoint
- **Top MCP Tools**: Most frequently called tools
- **Nextcloud API Latency**: API call latency by app (notes, calendar, etc.)
- **Vector Sync Queue**: Queue size for background document processing
+#### Overview Row
+High-level metrics for quick health assessment:
+- **Request Rate** (stat): Total requests per second
+- **Error Rate** (stat): Percentage of 5xx errors with color thresholds
+- **P95 Latency** (stat): 95th percentile request latency
+- **Active Requests** (stat): Current in-flight requests
+
+#### HTTP Metrics (RED Pattern)
+Core request/error/duration metrics:
+- **Request Rate by Endpoint** (timeseries): RPS breakdown by endpoint
+- **Error Rate by Status Code** (timeseries): Error rates for 4xx/5xx codes
+- **Latency Percentiles** (timeseries): P50, P95, P99 latency trends
+- **Status Code Distribution** (piechart): Percentage breakdown of all status codes
+
+#### MCP Tools Row
+MCP-specific tool performance:
+- **Top Tools by Call Volume** (bargauge): Top 10 most-called tools
+- **Tool Error Rate** (timeseries): Error rates per tool
+- **Tool Execution Duration** (timeseries): P95 latency by tool
+
+#### Nextcloud API Row
+Backend API performance metrics:
+- **API Calls by App** (timeseries): Request rate per Nextcloud app (notes, calendar, contacts, etc.)
+- **API Latency by App** (timeseries): P95 latency per app
+- **API Retries by Reason** (timeseries): Retry patterns (429, timeout, connection errors)
+- **API Error Rate** (stat): Overall API error percentage
+
+#### OAuth & Authentication Row
+OAuth token operations and caching:
+- **Token Validations** (timeseries): Success/failure rates for token validation
+- **Token Exchange Operations** (timeseries): RFC 8693 token exchange operations
+- **Token Cache Hit Rate** (stat): Percentage of cache hits (color-coded: red<50%, yellow<80%, green≥80%)
+- **Refresh Token Operations** (timeseries): Refresh token storage operations by type
+
+#### Dependencies & Health Row
+External dependency status monitoring:
+- **Nextcloud Health** (stat): UP/DOWN status with color coding
+- **Qdrant Health** (stat): Vector database health status
+- **Keycloak Health** (stat): Identity provider health status
+- **Unstructured API Health** (stat): Document processing API status
+- **Health Check Duration** (timeseries): Health check latency by dependency
+- **Database Operation Latency** (timeseries): P95 latency for DB operations (SQLite, Qdrant)
+
+#### Vector Sync Row (when enabled)
+Document processing pipeline metrics:
+- **Documents Processed Rate** (timeseries): Processing throughput by status (success/failure)
+- **Processing Queue Depth** (gauge): Current queue size with thresholds (yellow>50, red>100)
+- **Qdrant Operations** (timeseries): Vector database operations by type
+- **Document Processing Duration** (timeseries): P95 processing latency

 ## Importing to Grafana

@@ -25,49 +68,77 @@ Comprehensive dashboard with the following panels:
 4. Select your Prometheus data source
 5. Click "Import"

-### Automated Import (Kubernetes)
+### Automated Import (Helm Chart)

-If using the Grafana Operator or kube-prometheus-stack, you can create a ConfigMap:
+The Helm chart now supports automatic dashboard provisioning via Grafana sidecar pattern.
+
+#### Option 1: Using Helm Chart (Recommended)
+
+Enable dashboard provisioning in your Helm values:
+
+```yaml
+# values.yaml for nextcloud-mcp-server chart
+dashboards:
+  enabled: true
+  grafanaFolder: "Nextcloud MCP"  # Folder name in Grafana
+  labels: {}  # Additional labels if needed
+```
+
+Then deploy or upgrade:

 ```bash
-kubectl create configmap nextcloud-mcp-dashboards \
+helm upgrade --install nextcloud-mcp nextcloud-mcp-server \
+  --set dashboards.enabled=true
+```
+
+The dashboard will be automatically imported by Grafana if the sidecar is configured
+to watch for ConfigMaps with label `grafana_dashboard: "1"`.
+
+#### Option 2: Using kube-prometheus-stack
+
+If using kube-prometheus-stack with Grafana sidecar enabled, the dashboard will be
+automatically discovered and imported. Ensure your Grafana deployment has:
+
+```yaml
+# kube-prometheus-stack values
+grafana:
+  sidecar:
+    dashboards:
+      enabled: true
+      label: grafana_dashboard
+      folder: /tmp/dashboards
+      provider:
+        foldersFromFilesStructure: true
+```
+
+#### Option 3: Manual ConfigMap Creation
+
+For other Grafana setups, create a ConfigMap manually:
+
+```bash
+kubectl create configmap nextcloud-mcp-dashboard \
  --from-file=nextcloud-mcp-server.json \
  -n monitoring

-# Add label for Grafana sidecar to discover
-kubectl label configmap nextcloud-mcp-dashboards \
+# Add sidecar discovery label
+kubectl label configmap nextcloud-mcp-dashboard \
  grafana_dashboard=1 \
  -n monitoring
-```

-Or add to your Helm values:
-
-```yaml
-# values.yaml for kube-prometheus-stack
-grafana:
-  dashboardProviders:
-    dashboardproviders.yaml:
-      apiVersion: 1
-      providers:
-        - name: 'nextcloud-mcp'
-          orgId: 1
-          folder: 'Nextcloud MCP'
-          type: file
-          disableDeletion: false
-          editable: true
-          options:
-            path: /var/lib/grafana/dashboards/nextcloud-mcp
-
-  dashboardsConfigMaps:
-    nextcloud-mcp: nextcloud-mcp-dashboards
+# Add folder annotation (annotations support spaces, unlike labels)
+kubectl annotate configmap nextcloud-mcp-dashboard \
+  grafana_folder="Nextcloud MCP" \
+  -n monitoring
 ```

 ## Dashboard Variables

-The dashboard includes two variables:
+The dashboard includes four template variables for dynamic filtering:

- **Data Source**: Select your Prometheus data source
- **Namespace**: Filter metrics by Kubernetes namespace
+- **datasource**: Select your Prometheus data source
+- **namespace**: Filter metrics by Kubernetes namespace (supports "All")
+- **pod**: Filter by specific pod(s) - multi-select enabled (supports "All")
+- **interval**: Query interval for rate calculations (1m, 5m, 10m, 30m, 1h - default: 5m)

 ## Customization

@@ -96,6 +96,30 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
   kubectl --namespace {{ .Release.Namespace }} exec -it deploy/{{ include "nextcloud-mcp-server.fullname" . }} -- curl -s http://localhost:{{ include "nextcloud-mcp-server.port" . }}/user/page | grep "Vector Sync"
 {{- end }}

+{{- if .Values.dashboards.enabled }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Enabled
+   - ConfigMap: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+   - Grafana Folder: {{ .Values.dashboards.grafanaFolder }}
+
+   The dashboard will be automatically imported by Grafana if the sidecar is configured
+   to watch for ConfigMaps with label "grafana_dashboard: 1".
+
+   To manually import the dashboard:
+   kubectl --namespace {{ .Release.Namespace }} get configmap {{ include "nextcloud-mcp-server.fullname" . }}-dashboard -o jsonpath='{.data.nextcloud-mcp-server\.json}' | jq . > dashboard.json
+
+   Then import dashboard.json via Grafana UI (Dashboards → Import).
+{{- else }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Disabled
+   - To enable automatic dashboard provisioning, set: dashboards.enabled=true
+
+   Manual import option:
+   The dashboard JSON is available in the chart at charts/nextcloud-mcp-server/dashboards/nextcloud-mcp-server.json
+{{- end }}
+
 For more information and documentation:
 - GitHub: https://github.com/cbcoutinho/nextcloud-mcp-server
 - Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
@@ -0,0 +1,25 @@
+{{- if .Values.dashboards.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.dashboards.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana sidecar discovery label
+    grafana_dashboard: "1"
+  annotations:
+    {{- with .Values.dashboards.annotations }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana folder name (annotations support spaces, unlike labels)
+    {{- if .Values.dashboards.grafanaFolder }}
+    grafana_folder: {{ .Values.dashboards.grafanaFolder | quote }}
+    {{- end }}
+data:
+  nextcloud-mcp-server.json: |-
+{{ .Files.Get "dashboards/nextcloud-mcp-server.json" | indent 4 }}
+{{- end }}
@@ -218,8 +218,6 @@ spec:
            - name: METRICS_PORT
              value: {{ .Values.observability.metrics.port | quote }}
            {{- if .Values.observability.tracing.enabled }}
-            - name: OTEL_ENABLED
-              value: "true"
            - name: OTEL_EXPORTER_OTLP_ENDPOINT
              value: {{ .Values.observability.tracing.endpoint | quote }}
            - name: OTEL_SERVICE_NAME
@@ -205,6 +205,20 @@ prometheusRule:
  # Additional labels for PrometheusRule (e.g., for Prometheus selector)
  # Example: { prometheus: kube-prometheus }

+# Grafana dashboards (requires Grafana with sidecar enabled)
+dashboards:
+  # Enable automatic dashboard provisioning via ConfigMap
+  enabled: false
+  # Grafana folder name where dashboards will be imported
+  # The grafana-sidecar looks for ConfigMaps with label "grafana_dashboard: 1"
+  # and reads the folder name from annotation "grafana_folder" (supports spaces)
+  grafanaFolder: "Nextcloud MCP"
+  # Additional labels for dashboard ConfigMap
+  # These will be added alongside the required "grafana_dashboard: 1" label
+  labels: {}
+  # Additional annotations for dashboard ConfigMap
+  annotations: {}
+
 service:
  type: ClusterIP
  port: 8000
@@ -0,0 +1,25 @@
+# CI-specific overrides for RAG evaluation pipeline
+# This file is used by the rag-evaluation.yml workflow to configure the MCP
+# container with OpenAI/GitHub Models API for vector embeddings.
+#
+# Usage:
+#   docker compose -f docker-compose.yml -f docker-compose.ci.yml up
+#
+# Environment variables (set in CI workflow):
+#   OPENAI_API_KEY - API key for embeddings (GitHub Models uses GITHUB_TOKEN)
+#   OPENAI_BASE_URL - API endpoint (e.g., https://models.github.ai/inference)
+#   OPENAI_EMBEDDING_MODEL - Model name (e.g., openai/text-embedding-3-small)
+#   OPENAI_GENERATION_MODEL - Model name for generation (e.g., openai/gpt-4o-mini)
+
+services:
+  mcp:
+    environment:
+      # OpenAI provider configuration (required for CI vector sync)
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://models.github.ai/inference}
+      - OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-openai/text-embedding-3-small}
+      - OPENAI_GENERATION_MODEL=${OPENAI_GENERATION_MODEL:-openai/gpt-4o-mini}
+      # Faster sync for CI
+      - VECTOR_SYNC_SCAN_INTERVAL=${VECTOR_SYNC_SCAN_INTERVAL:-5}
+      # Enable document processing for PDF parsing
+      - ENABLE_DOCUMENT_PROCESSING=true
@@ -3,7 +3,7 @@ services:
  # https://hub.docker.com/_/mariadb
  db:
    # Note: Check the recommend version here: https://docs.nextcloud.com/server/latest/admin_manual/installation/system_requirements.html#server
-    image: docker.io/library/mariadb:lts@sha256:ae6119716edac6998ae85508431b3d2e666530ddf4e94c61a10710caec9b0f71
+    image: docker.io/library/mariadb:lts@sha256:1cac8492bd78b1ec693238dc600be173397efd7b55eabc725abc281dc855b482
    restart: always
    command: --transaction-isolation=READ-COMMITTED
    volumes:
@@ -17,11 +17,11 @@ services:
  # Note: Redis is an external service. You can find more information about the configuration here:
  # https://hub.docker.com/_/redis
  redis:
-    image: docker.io/library/redis:alpine@sha256:28c9c4d7596949a24b183eaaab6455f8e5d55ecbf72d02ff5e2c17fe72671d31
+    image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
    restart: always

  app:
-    image: docker.io/library/nextcloud:32.0.1@sha256:5b043f7ea2f609d5ff5635f475c30d303bec17775a5c3f7fa435e3818e669120
+    image: docker.io/library/nextcloud:32.0.2@sha256:8cb1dc8c26944115469dd22f4965d2ed35bab9cf8c48d2bb052c8e9f83821ded
    restart: always
    ports:
      - 0.0.0.0:8080:80
@@ -34,7 +34,7 @@ services:
      - ./app-hooks:/docker-entrypoint-hooks.d:ro
      # Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
      # The post-installation hook will register /opt/apps as an additional app directory
-      - ./third_party:/opt/apps:ro
+      #- ./third_party:/opt/apps:ro
    environment:
      - NEXTCLOUD_TRUSTED_DOMAINS=app
      - NEXTCLOUD_ADMIN_USER=admin
@@ -69,45 +69,51 @@ services:

  mcp:
    build: .
-    command: ["--transport", "streamable-http"]
    restart: always
+    command: ["--transport", "streamable-http"]
    depends_on:
      app:
        condition: service_healthy
    ports:
      - 127.0.0.1:8000:8000
+      - 127.0.0.1:9090:9090
    volumes:
      - mcp-data:/app/data
    environment:
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_USERNAME=admin
      - NEXTCLOUD_PASSWORD=admin
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080

      # Vector sync configuration (ADR-007)
      - VECTOR_SYNC_ENABLED=true
-      - VECTOR_SYNC_SCAN_INTERVAL=10
+      - VECTOR_SYNC_SCAN_INTERVAL=60
      - VECTOR_SYNC_PROCESSOR_WORKERS=1

-      - LOG_FORMAT=text
+      #- LOG_FORMAT=json

      # Qdrant configuration (three modes):
      # 1. Network mode: Set QDRANT_URL=http://qdrant:6333 (requires qdrant service)
      # 2. In-memory mode: Set QDRANT_LOCATION=:memory: (default if nothing set)
      # 3. Persistent local: Set QDRANT_LOCATION=/app/data/qdrant (stored in mcp-data volume)
-      - QDRANT_LOCATION=":memory:"  # In-memory mode for CI/testing (no external service required)
+      #- QDRANT_LOCATION=/app/data/qdrant  # In-memory mode used if not set
      #- QDRANT_URL=http://qdrant:6333  # Uncomment for network mode
      #- QDRANT_API_KEY=${QDRANT_API_KEY:-my_secret_api_key}  # Only for network mode

+      # Observability
+      #- OTEL_SERVICE_NAME=nextcloud-mcp-docker-compose
+      #- OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4317
+
      # Collection naming: Auto-generated as {deployment-id}-{model-name}
      # - Deployment ID: OTEL_SERVICE_NAME (if set) or hostname (fallback)
      # - Model name: OLLAMA_EMBEDDING_MODEL
      # - Example: "nextcloud-mcp-server-nomic-embed-text"
      # - Changing models creates new collection (requires re-embedding)
      # - Set QDRANT_COLLECTION to override auto-generation:
-      - QDRANT_COLLECTION=nextcloud_content
+      #- QDRANT_COLLECTION=nextcloud_content

      # Ollama configuration (optional - uses SimpleEmbeddingProvider if not set)
-      # - OLLAMA_BASE_URL=https://ollama.internal.coutinho.io:443
+      # - OLLAMA_BASE_URL=http://ollama:11434
      # - OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Changing this creates new collection
      # - OLLAMA_VERIFY_SSL=false

@@ -152,7 +158,7 @@ services:
      - oauth-tokens:/app/data

  keycloak:
-    image: quay.io/keycloak/keycloak:26.4.4@sha256:c6459d5fae1b759f5d667ebdc6237ab3121379c3494e213898569014ede1846d
+    image: quay.io/keycloak/keycloak:26.4.7@sha256:9409c59bdfb65dbffa20b11e6f18b8abb9281d480c7ca402f51ed3d5977e6007
    command:
      - "start-dev"
      - "--import-realm"
@@ -189,8 +195,8 @@ services:
      # Provider auto-detected from OIDC_DISCOVERY_URL issuer
      # Using internal Docker hostname for discovery to get consistent issuer
      - OIDC_DISCOVERY_URL=http://keycloak:8080/realms/nextcloud-mcp/.well-known/openid-configuration
-      - OIDC_CLIENT_ID=nextcloud-mcp-server
-      - OIDC_CLIENT_SECRET=mcp-secret-change-in-production
+      - NEXTCLOUD_OIDC_CLIENT_ID=nextcloud-mcp-server
+      - NEXTCLOUD_OIDC_CLIENT_SECRET=mcp-secret-change-in-production
      - OIDC_JWKS_URI=http://keycloak:8080/realms/nextcloud-mcp/protocol/openid-connect/certs

      # Nextcloud API endpoint (for accessing APIs with validated token)
@@ -218,8 +224,28 @@ services:
      - keycloak-tokens:/app/data
      - keycloak-oauth-storage:/app/.oauth

+  # Smithery stateless deployment mode (ADR-016)
+  # Test with: docker compose --profile smithery up smithery
+  # Then: curl http://localhost:8081/.well-known/mcp-config
+  smithery:
+    build:
+      context: .
+      dockerfile: Dockerfile.smithery
+    restart: always
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8081:8081
+    environment:
+      - SMITHERY_DEPLOYMENT=true
+      - VECTOR_SYNC_ENABLED=false
+      - PORT=8081
+    profiles:
+      - smithery
+
  qdrant:
-    image: qdrant/qdrant:v1.15.5
+    image: qdrant/qdrant:v1.16.2@sha256:dab6de32f7b2cc599985a7c764db3e8b062f70508fb85ca074aa856f829bf335
    restart: always
    ports:
      - 127.0.0.1:6333:6333  # REST API
@@ -377,7 +377,7 @@ async def get_vector_sync_status(ctx: Context) -> dict:
    }
 ```

-The web UI (`/user/page` route) mirrors these controls with a simple toggle switch for enabling/disabling sync and a status display showing indexed counts and sync state. There is no job history, no detailed progress bars, no per-document status—just the essential information users need.
+The web UI (`/app` route) mirrors these controls with a simple toggle switch for enabling/disabling sync and a status display showing indexed counts and sync state. There is no job history, no detailed progress bars, no per-document status—just the essential information users need.

 ### Authentication and Offline Access

@@ -0,0 +1,661 @@
+# ADR-010: Webhook-Based Vector Database Synchronization
+
+**Status**: Proposed
+**Date**: 2025-01-10
+**Depends On**: ADR-007 (Background Vector Sync)
+
+## Context
+
+ADR-007 established a background synchronization architecture for maintaining the vector database using periodic polling. The scanner task runs on a configurable interval (default 3600 seconds / 1 hour) to detect changed documents across Nextcloud apps. While this polling approach is simple and reliable, it introduces significant latency between content changes and vector database updates.
+
+### Current Polling Architecture
+
+The existing scanner implementation in `nextcloud_mcp_server/vector/scanner.py` operates as follows:
+
+1. **Periodic Scanning**: The scanner task sleeps for `vector_sync_scan_interval` seconds between runs
+2. **Change Detection**: For each scan, it:
+   - Fetches all documents from Nextcloud (notes, calendar events, etc.)
+   - Queries Qdrant for the last indexed timestamp of each document
+   - Compares modification timestamps to detect changes
+   - Queues changed documents for processing
+3. **Document Processing**: Processor tasks pull from the queue, generate embeddings, and update Qdrant
+
+This architecture works but has fundamental limitations:
+
+**Latency**: With a 1-hour scan interval, content changes can take up to 1 hour to appear in semantic search results. For time-sensitive use cases (e.g., "What's on my calendar today?"), this delay is problematic.
+
+**API Load**: Every scan fetches *all* documents for *all* enabled users, regardless of whether anything changed. For large deployments with thousands of documents, this generates significant unnecessary API traffic to Nextcloud.
+
+**Resource Waste**: The scanner and processors consume compute resources even when no content has changed. During periods of low activity, the system performs wasteful polling.
+
+**Scalability**: As the number of users and documents grows, the time required to complete a full scan increases. Eventually, the scan duration may exceed the scan interval, causing scans to run continuously without idle periods.
+
+**Rate Limiting**: Fetching all documents for all users in rapid succession can trigger Nextcloud's rate limiting, especially on shared hosting environments with restrictive API quotas.
+
+These limitations are inherent to any polling-based architecture. Reducing the scan interval (e.g., to 5 minutes) reduces latency but exacerbates API load, resource waste, and rate limiting issues. The fundamental problem is that the system has no way to know *when* content changes occur—it must repeatedly check to find out.
+
+### Nextcloud Webhook Listeners
+
+Nextcloud provides a webhook_listeners app (bundled with Nextcloud 30+) that enables push-based change notifications. Instead of polling for changes, external services can register webhook endpoints and receive HTTP POST requests when specific events occur. Administrators register these webhooks using Nextcloud's OCS API or occ commands.
+
+The webhook_listeners app supports events for all Nextcloud apps relevant to this MCP server's vector database:
+
+**Files/Notes Events** (notes are stored as files):
+- `OCP\Files\Events\Node\NodeCreatedEvent`
+- `OCP\Files\Events\Node\NodeWrittenEvent`
+- `OCP\Files\Events\Node\BeforeNodeDeletedEvent` ⭐ **Use this for deletion (includes node.id)**
+- `OCP\Files\Events\Node\NodeDeletedEvent` (missing node.id - file already deleted)
+- `OCP\Files\Events\Node\NodeRenamedEvent`
+- `OCP\Files\Events\Node\NodeCopiedEvent`
+
+**Calendar Events**:
+- `OCP\Calendar\Events\CalendarObjectCreatedEvent`
+- `OCP\Calendar\Events\CalendarObjectUpdatedEvent`
+- `OCP\Calendar\Events\CalendarObjectDeletedEvent`
+- `OCP\Calendar\Events\CalendarObjectMovedEvent`
+
+**Tables Events**:
+- `OCA\Tables\Event\RowAddedEvent`
+- `OCA\Tables\Event\RowUpdatedEvent`
+- `OCA\Tables\Event\RowDeletedEvent`
+
+**Deck Events** (via file events since cards are stored as files in some configurations)
+
+Each webhook notification includes rich metadata:
+- User ID who triggered the event
+- Timestamp of the event
+- Document ID and metadata
+- Operation type (create, update, delete)
+- Path information (for files)
+
+Webhook notifications are dispatched via background jobs, with configurable delivery guarantees. Administrators can set up dedicated webhook worker processes to achieve near-real-time delivery (within seconds of the triggering event).
+
+### Why Not Replace Polling Entirely?
+
+While webhooks provide superior latency and efficiency, they cannot fully replace polling:
+
+**Missed Events**: If the MCP server is down when a webhook fires, the notification is lost. Nextcloud's background job system processes webhooks asynchronously, but does not queue failed deliveries indefinitely.
+
+**Administrator Setup**: Webhooks must be registered by Nextcloud administrators using the OCS API or occ commands. This is an optional optimization that administrators can enable when they want to reduce polling frequency.
+
+**Filter Configuration**: Webhook filters must be carefully configured to avoid notification floods. A poorly configured filter could send thousands of notifications for bulk operations (e.g., importing a calendar with hundreds of events).
+
+**Graceful Degradation**: In environments where webhooks are not configured, the system continues using polling without any degradation in functionality.
+
+**Deletion Detection**: Nextcloud's webhook system does not guarantee delivery of deletion events if the user's account is removed or the app is uninstalled. Periodic polling provides a safety mechanism to detect orphaned documents.
+
+A complementary architecture where webhooks supplement (but don't replace) polling provides low-latency updates when configured, with polling ensuring reliability.
+
+### Design Considerations
+
+**Push vs Pull Trade-offs**:
+Webhooks introduce new failure modes (network issues, endpoint unavailability, notification floods) that polling avoids. The webhook endpoint must handle failures gracefully without blocking semantic search functionality.
+
+**Webhook Endpoint Security**:
+The MCP server exposes an HTTP endpoint to receive webhooks. Authentication is optional—in production deployments, administrators can configure Nextcloud to send an `Authorization` header that the MCP server validates. For local development, authentication can be disabled for simplicity.
+
+**Idempotency**:
+The system may receive duplicate notifications (webhook + next scan) or out-of-order notifications (update fires before create completes). Document processing must be idempotent—processing the same document multiple times produces the same result.
+
+**Asynchronous Processing**:
+Nextcloud processes webhooks via background jobs, introducing delivery latency (typically seconds to minutes depending on background job configuration). This affects testing strategies—integration tests cannot rely on immediate webhook delivery.
+
+**Deployment Patterns**:
+The MCP server webhook endpoint is accessible at the same host/port as the MCP server itself. Administrators configure Nextcloud to POST to `https://<mcp-server-host>:<port>/webhooks/nextcloud` when registering webhook listeners.
+
+## Decision
+
+We will add a webhook endpoint to the MCP server that receives change notifications from Nextcloud and queues documents for vector database processing. This complements the existing polling architecture from ADR-007 without replacing it—webhooks provide low-latency updates when configured, while polling ensures reliability regardless of webhook availability.
+
+The architecture is intentionally simple: the webhook endpoint is just another producer of `DocumentTask` objects that feed into the existing processor queue. The scanner task, processor pool, and queue management remain unchanged from ADR-007.
+
+### Architecture Components
+
+**1. Webhook Endpoint**
+
+A new Starlette HTTP route will be added to receive webhook notifications from Nextcloud:
+
+```python
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+@app.route("/webhooks/nextcloud", methods=["POST"])
+async def handle_nextcloud_webhook(request: Request) -> JSONResponse:
+    """
+    Receive webhook notifications from Nextcloud.
+
+    Parses event payload, extracts document metadata, and queues
+    changed documents for processing using the same queue as the scanner.
+    """
+    # 1. Optional authentication validation
+    if settings.webhook_secret:
+        auth_header = request.headers.get("authorization", "")
+        if not auth_header.startswith("Bearer ") or \
+           auth_header[7:] != settings.webhook_secret:
+            logger.warning("Webhook authentication failed")
+            return JSONResponse(
+                {"status": "error", "message": "Unauthorized"},
+                status_code=401
+            )
+
+    # 2. Parse webhook payload
+    payload = await request.json()
+    event_class = payload["event"]["class"]
+    user_id = payload["user"]["uid"]
+
+    # 3. Extract document metadata from event
+    doc_task = extract_document_task(event_class, payload)
+    if not doc_task:
+        return JSONResponse({"status": "ignored", "reason": "unsupported event"})
+
+    # 4. Send to processor queue (same queue as scanner)
+    try:
+        await webhook_send_stream.send(doc_task)
+        logger.info(f"Queued document from webhook: {doc_task}")
+        return JSONResponse({"status": "queued"})
+    except Exception as e:
+        logger.error(f"Failed to queue webhook document: {e}")
+        return JSONResponse(
+            {"status": "error", "message": str(e)},
+            status_code=500
+        )
+```
+
+The endpoint:
+- Validates optional authentication via `Authorization: Bearer <secret>` header
+- Parses various event types (calendar, files, tables) into `DocumentTask` objects
+- Sends to the same processing queue that the scanner uses
+- Returns quickly (<50ms) to avoid blocking Nextcloud's webhook workers
+- Handles errors gracefully (invalid payload, queue full, etc.)
+
+**2. Webhook Registration Helper (Development Only)**
+
+For development and testing purposes, a helper method will be added to `NextcloudClient` for registering webhooks via the OCS API. This is NOT exposed as an MCP tool—administrators register webhooks manually using Nextcloud's admin interface or the OCS API directly.
+
+```python
+class NextcloudClient:
+    async def register_webhook(
+        self,
+        event_type: str,
+        uri: str,
+        http_method: str = "POST",
+        auth_method: str = "none",
+        headers: dict[str, str] | None = None,
+    ) -> dict:
+        """
+        Register a webhook with Nextcloud (requires admin credentials).
+
+        Used for development/testing. Production admins should register
+        webhooks using Nextcloud's admin UI or occ commands.
+        """
+        # Implementation uses OCS API: POST /ocs/v2.php/apps/webhook_listeners/api/v1/webhooks
+        ...
+```
+
+This keeps webhook registration out of the MCP tool surface while providing a convenient API for integration tests.
+
+**3. Event Parsing**
+
+A helper function extracts `DocumentTask` from various Nextcloud event types:
+
+```python
+def extract_document_task(event_class: str, payload: dict) -> DocumentTask | None:
+    """Extract DocumentTask from webhook event payload."""
+    user_id = payload["user"]["uid"]
+    event_data = payload["event"]
+
+    # File/Note events
+    if "NodeCreatedEvent" in event_class or "NodeWrittenEvent" in event_class:
+        # Only process markdown files (notes)
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=event_data["node"]["id"],
+            doc_type="note",
+            operation="index",
+            modified_at=payload["time"],
+        )
+
+    # Calendar events
+    elif "CalendarObjectCreatedEvent" in event_class or \
+         "CalendarObjectUpdatedEvent" in event_class:
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=str(event_data["objectData"]["id"]),
+            doc_type="calendar_event",
+            operation="index",
+            modified_at=event_data["objectData"]["lastmodified"],
+        )
+
+    # Deletion events (use BeforeNodeDeletedEvent for files to get node.id)
+    elif "BeforeNodeDeletedEvent" in event_class or \
+         "NodeDeletedEvent" in event_class or \
+         "CalendarObjectDeletedEvent" in event_class:
+        # Similar logic for delete operations
+        ...
+
+    return None  # Unsupported event type
+```
+
+**4. No Changes to Scanner or Processors**
+
+The existing scanner task from ADR-007 continues operating unchanged. It polls Nextcloud on its configured interval (`VECTOR_SYNC_SCAN_INTERVAL`), discovers changed documents, and queues them for processing. The scanner is unaware of webhooks—it simply adds `DocumentTask` objects to the queue.
+
+Similarly, the processor pool continues pulling `DocumentTask` objects from the queue, generating embeddings, and updating Qdrant. Processors don't know or care whether a task came from the scanner or a webhook.
+
+This design keeps concerns separated: webhooks and scanner are independent producers, processors are independent consumers, and the queue mediates between them.
+
+### Configuration
+
+A new optional environment variable controls webhook authentication:
+
+```bash
+# Optional: Shared secret for webhook authentication
+# If set, webhooks must include "Authorization: Bearer <secret>" header
+# If unset, no authentication is required (useful for local development)
+WEBHOOK_SECRET=<generate-random-secret>
+```
+
+The webhook endpoint is automatically available at `/webhooks/nextcloud` when the MCP server starts. No feature flags or additional configuration needed—if Nextcloud sends webhooks to this endpoint, they will be processed.
+
+**Reducing Polling Frequency**: Administrators who configure webhooks may want to reduce polling frequency to minimize API load while maintaining safety reconciliation scans:
+
+```bash
+# Increase scan interval from 1 hour (default) to 24 hours
+VECTOR_SYNC_SCAN_INTERVAL=86400
+```
+
+This is a manual configuration decision, not automatic—the scanner doesn't adapt based on webhook availability.
+
+### Webhook Event Mapping
+
+The webhook handler maps Nextcloud events to document types:
+
+| Nextcloud Event | Document Type | Operation |
+|----------------|---------------|-----------|
+| `NodeCreatedEvent` (path: `*/files/*.md`) | `note` | `index` |
+| `NodeWrittenEvent` (path: `*/files/*.md`) | `note` | `index` |
+| `NodeDeletedEvent` (path: `*/files/*.md`) | `note` | `delete` |
+| `CalendarObjectCreatedEvent` | `calendar_event` | `index` |
+| `CalendarObjectUpdatedEvent` | `calendar_event` | `index` |
+| `CalendarObjectDeletedEvent` | `calendar_event` | `delete` |
+| `RowAddedEvent` | `table_row` | `index` |
+| `RowUpdatedEvent` | `table_row` | `index` |
+| `RowDeletedEvent` | `table_row` | `delete` |
+
+Path filters in webhook registration ensure only relevant files trigger notifications (e.g., exclude `.jpg`, `.mp4` for file events).
+
+### Administrator Setup
+
+Administrators who want to enable webhooks:
+
+1. **Enable webhook_listeners app** in Nextcloud: `occ app:enable webhook_listeners`
+2. **Register webhook endpoints** using Nextcloud's OCS API or admin UI:
+   - Endpoint: `https://<mcp-server-host>:<port>/webhooks/nextcloud`
+   - Events: File created/updated/deleted, Calendar object events, Table row events
+   - Filters: Exclude non-content files (images, videos), system directories
+   - Optional: Configure `Authorization: Bearer <WEBHOOK_SECRET>` header
+3. **Optionally reduce scanner frequency**: Set `VECTOR_SYNC_SCAN_INTERVAL=86400` (24 hours)
+4. **Set up webhook workers** (optional): Configure dedicated background job workers for low-latency delivery
+
+Existing deployments continue using polling without any changes. Webhooks are purely additive.
+
+## Consequences
+
+### Benefits
+
+**Reduced Latency**: With webhooks configured, content changes appear in semantic search within seconds to minutes (depending on Nextcloud background job configuration) instead of up to 1 hour. Queries like "What meetings do I have today?" reflect recent calendar updates.
+
+**Lower API Load**: Administrators who configure webhooks can reduce scanner frequency (e.g., 24-hour intervals), eliminating most polling API calls while maintaining safety reconciliation scans. This significantly reduces load on Nextcloud servers.
+
+**Better Scalability**: Webhooks scale better than polling as content volume grows. The system only processes changed documents instead of checking all documents every hour.
+
+**Simple Architecture**: The webhook endpoint is just another producer feeding the existing processor queue. No changes to scanner, processors, or queue management—webhooks integrate cleanly into the existing architecture.
+
+**Improved User Experience**: Lower-latency semantic search feels more responsive and accurate, especially for time-sensitive queries about recent changes.
+
+### Drawbacks
+
+**Manual Configuration**: Administrators must configure webhooks outside the MCP server using Nextcloud's admin tools. This adds setup complexity compared to the zero-configuration polling approach.
+
+**Deployment Requirements**: Webhooks require the MCP server to be reachable from Nextcloud via HTTP(S). Deployments behind NAT or with restrictive firewalls may not support webhooks without additional networking configuration.
+
+**Asynchronous Delivery**: Nextcloud processes webhooks via background jobs, introducing delivery latency (typically seconds to minutes). The exact latency depends on background job worker configuration and system load.
+
+**Testing Complexity**: Integration tests cannot rely on immediate webhook delivery due to asynchronous background job processing. Tests must either poll for results or mock webhook delivery directly.
+
+**New Failure Modes**: Webhook endpoint downtime, network issues between Nextcloud and MCP server, webhook notification floods from bulk operations. The system must handle these gracefully.
+
+**Version Dependencies**: The webhook_listeners app requires Nextcloud 30+. Older versions continue using polling exclusively.
+
+### Monitoring and Observability
+
+New metrics track webhook performance:
+
+- `webhook_notifications_received_total{event_type}`: Count of webhook notifications by event type
+- `webhook_processing_duration_seconds{event_type}`: Webhook handler latency
+- `webhook_errors_total{error_type}`: Failed webhook processing by error type (auth failure, parse error, queue full)
+
+Logs include:
+- Successful webhook processing: `Queued document from webhook: DocumentTask(...)`
+- Webhook authentication failures: `Webhook authentication failed`
+- Parse errors: `Failed to parse webhook payload: ...`
+- Unsupported events: `Ignoring webhook for unsupported event: ...`
+
+### Security Considerations
+
+**Optional Authentication**: When `WEBHOOK_SECRET` is configured, webhook requests must include `Authorization: Bearer <WEBHOOK_SECRET>` header. The server validates this before processing to prevent unauthorized document queueing. For local development, authentication can be disabled by leaving `WEBHOOK_SECRET` unset.
+
+**Payload Validation**: Webhook payloads are parsed and validated against expected schemas. Malformed payloads are rejected with 400 Bad Request responses.
+
+**No Scope Enforcement**: Unlike MCP tools, webhooks do not enforce progressive consent or check if users have enabled semantic search. Webhooks queue all document changes—administrators control which events trigger webhooks via Nextcloud filters. This keeps the webhook endpoint simple and stateless.
+
+### Testing Strategy
+
+**Unit Tests**: Test webhook handler logic, event parsing, and authentication validation using mocked payloads:
+
+```python
+async def test_webhook_endpoint_parses_note_created_event():
+    """Unit test: webhook endpoint extracts DocumentTask from note created event."""
+    payload = {
+        "user": {"uid": "alice"},
+        "time": 1704067200,
+        "event": {
+            "class": "OCP\\Files\\Events\\Node\\NodeCreatedEvent",
+            "node": {"id": "123", "path": "/alice/files/test.md"}
+        }
+    }
+    # Mock send_stream and verify DocumentTask is queued
+    ...
+```
+
+**Integration Tests (Without Real Webhooks)**: Since Nextcloud processes webhooks asynchronously via background jobs, integration tests should NOT rely on triggering real Nextcloud events and waiting for webhook delivery. Instead, tests should:
+
+1. **Mock webhook delivery**: POST webhook payloads directly to the `/webhooks/nextcloud` endpoint
+2. **Verify processing**: Check that documents are queued and eventually appear in Qdrant
+3. **Test authentication**: Verify requests without valid auth header are rejected (when `WEBHOOK_SECRET` is set)
+
+```python
+async def test_webhook_integration_mocked_delivery():
+    """Integration test: webhook handler queues document for processing."""
+    # POST webhook payload directly to endpoint (bypass Nextcloud)
+    response = await client.post("/webhooks/nextcloud", json=note_created_payload)
+    assert response.status_code == 200
+
+    # Wait for processor to handle document
+    await asyncio.sleep(2)
+
+    # Verify document appears in Qdrant
+    results = await qdrant_client.scroll(...)
+    assert len(results[0]) > 0
+```
+
+**Manual Testing (Real Webhooks)**: For end-to-end validation with real Nextcloud webhook delivery:
+
+1. Register webhook via OCS API or `NextcloudClient.register_webhook()` helper
+2. Configure webhook background job workers for low-latency delivery
+3. Trigger Nextcloud events (create note, add calendar event)
+4. Monitor MCP server logs for webhook delivery
+5. Verify documents appear in Qdrant after background job processing
+
+**Failure Mode Tests**:
+- Invalid authentication: Verify 401 response when auth header is missing/incorrect
+- Malformed payload: Verify 400 response for invalid JSON or missing required fields
+- Unsupported event types: Verify graceful handling (ignored, not error)
+- Queue full: Verify 500 response with appropriate error message
+
+### Future Enhancements
+
+**Batch Processing**: Group multiple webhook notifications within a short time window (e.g., 5 seconds) into a single batch before queueing. This reduces processor overhead during bulk operations like importing calendars.
+
+**Webhook Payload Optimization**: For large documents, Nextcloud could be configured to send minimal metadata in webhooks (just user_id, doc_id, doc_type), with processors fetching full content lazily. This reduces webhook payload size and network bandwidth.
+
+**Deduplication Window**: Track recently processed documents (last 5 minutes) to avoid redundant work when webhooks and scanner both detect the same change. The processor can check a simple in-memory cache before fetching document content.
+
+## Appendix A: Manual Webhook Testing Results (2025-01-11)
+
+### Testing Summary
+
+Manual validation of Nextcloud webhook schemas and behavior confirmed that webhooks work as documented with several important findings for implementation. **5 out of 6** webhook types were successfully captured and validated.
+
+**Test Environment:**
+- Nextcloud 30+ (Docker compose)
+- webhook_listeners app enabled
+- Test endpoint: `http://mcp:8000/webhooks/nextcloud`
+- Background webhook worker running (60s timeout)
+
+**Results:**
+- ✅ NodeCreatedEvent (file creation)
+- ✅ NodeWrittenEvent (file update)
+- ✅ NodeDeletedEvent (file deletion)
+- ✅ CalendarObjectCreatedEvent
+- ✅ CalendarObjectUpdatedEvent
+- ❌ CalendarObjectDeletedEvent (webhook did not fire - potential Nextcloud bug)
+
+### Critical Implementation Findings
+
+#### 1. Deletion Events Lack `node.id` Field
+
+**Finding:** `NodeDeletedEvent` payloads do NOT include `event.node.id`, only `event.node.path`.
+
+**Example:**
+```json
+{
+  "user": {"uid": "admin", "displayName": "admin"},
+  "time": 1762851093,
+  "event": {
+    "class": "OCP\\Files\\Events\\Node\\NodeDeletedEvent",
+    "node": {
+      "path": "/admin/files/Notes/Webhooks/Webhook Test Note.md"
+      // NOTE: No "id" field present
+    }
+  }
+}
+```
+
+**Impact:** The event parser in this ADR's example code assumes `event_data["node"]["id"]` exists for all file events. This will fail for deletions.
+
+**Update (2025-11-11):** Nextcloud maintainer clarified that `BeforeNodeDeletedEvent` should be used instead of `NodeDeletedEvent` to access `node.id` before the file is deleted. See [issue #56371](https://github.com/nextcloud/server/issues/56371#issuecomment-2470896634).
+
+> "Try using the `BeforeNodeDeletedEvent`. The `id` should still be available at that time. The reason `id` is not in `NodeDeletedEvent` is because the file is effectively guaranteed to be gone and, in turn, so is the FileInfo."
+> — Josh Richards, Nextcloud maintainer
+
+**Recommended Solution:** Use `OCP\Files\Events\Node\BeforeNodeDeletedEvent` for file deletion webhooks instead of `NodeDeletedEvent`.
+
+**Alternative Fix (if using NodeDeletedEvent):** Check for `id` existence and fall back to path-based identification:
+
+```python
+def extract_document_task(event_class: str, payload: dict) -> DocumentTask | None:
+    user_id = payload["user"]["uid"]
+    event_data = payload["event"]
+
+    # File deletion events - NO node.id field
+    if "NodeDeletedEvent" in event_class:
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+        # Use path-based ID since node.id is unavailable
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=f"path:{path}",  # Prefix to distinguish from numeric IDs
+            doc_type="note",
+            operation="delete",
+            modified_at=payload["time"],
+        )
+
+    # File creation/update events - node.id exists
+    elif "NodeCreatedEvent" in event_class or "NodeWrittenEvent" in event_class:
+        path = event_data["node"]["path"]
+        if not path.endswith(".md"):
+            return None
+
+        # Check if 'id' exists (should, but be defensive)
+        node_id = event_data["node"].get("id")
+        if not node_id:
+            # Fallback for missing ID
+            node_id = f"path:{path}"
+
+        return DocumentTask(
+            user_id=user_id,
+            doc_id=str(node_id),
+            doc_type="note",
+            operation="index",
+            modified_at=payload["time"],
+        )
+```
+
+**Qdrant Deletion Strategy:** When deleting by path-based ID, search Qdrant for documents with matching path metadata:
+
+```python
+async def delete_document_by_path(user_id: str, path: str):
+    """Delete document from Qdrant using path (when ID unavailable)."""
+    points = await qdrant.scroll(
+        collection_name=collection,
+        scroll_filter=Filter(must=[
+            FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+            FieldCondition(key="metadata.path", match=MatchValue(value=path)),
+        ]),
+    )
+    # Delete found points...
+```
+
+#### 2. Multiple Webhooks Per Operation
+
+**Finding:** Creating a single note triggers 3-5 separate webhook events in rapid succession:
+
+1. `NodeCreatedEvent` for parent folder (if new)
+2. `NodeWrittenEvent` for parent folder
+3. `NodeCreatedEvent` for the note file
+4. `NodeWrittenEvent` for the note file (sometimes fires twice)
+
+**Impact:** Without deduplication, the processor will fetch and index the same note multiple times within seconds, wasting compute and API quota.
+
+**Solution:** The processor queue should be idempotent. If the same document is queued multiple times, only the latest version needs processing. Implementation options:
+
+1. **Queue-level deduplication:** Before adding to queue, check if a task for the same `(user_id, doc_id)` is already pending. Replace the existing task instead of adding duplicate.
+
+2. **Processor-level deduplication:** Track recently processed documents in a short-lived cache (5 minutes). If a document was just processed, skip redundant fetch unless the `modified_at` timestamp is newer.
+
+3. **Accept duplicates:** Let the processor handle duplicates naturally. Qdrant upserts are idempotent—reindexing with identical content is harmless but wasteful.
+
+**Recommendation:** Implement queue-level deduplication by maintaining a map of pending tasks and replacing duplicates with newer timestamps.
+
+#### 3. Type Discrepancy in `node.id`
+
+**Finding:** Nextcloud documentation specifies `node.id` as type `string`, but actual payloads return `int`:
+
+```json
+"node": {
+  "id": 437,  // integer, not "437"
+  "path": "/admin/files/Notes/Webhooks/Webhook Test Note.md"
+}
+```
+
+**Impact:** Code that assumes `node.id` is always a string will work but may cause type confusion in strongly-typed languages.
+
+**Solution:** Explicitly convert to string when extracting: `doc_id=str(event_data["node"]["id"])`
+
+#### 4. Calendar Events Have Different ID Field Path
+
+**Finding:** Calendar events store the document ID in a different location than file events:
+
+- **File events:** `event.node.id`
+- **Calendar events:** `event.objectData.id`
+
+**Impact:** Event parser must handle different field paths for different event types. The example code in this ADR correctly shows this difference.
+
+**Calendar Event Deletion:** Calendar deletion webhooks did NOT fire during testing. This may be a Nextcloud bug or require specific configuration (e.g., trash bin enabled). Until resolved, calendar deletions will only be detected via periodic scanner runs.
+
+#### 5. Rich Metadata in Calendar Webhooks
+
+**Finding:** Calendar webhook payloads include extensive metadata not present in file webhooks:
+
+```json
+{
+  "event": {
+    "calendarId": 1,
+    "calendarData": {
+      "id": 1,
+      "uri": "personal",
+      "{http://calendarserver.org/ns/}getctag": "...",
+      "{http://sabredav.org/ns}sync-token": 21,
+      // ... many calendar-level properties
+    },
+    "objectData": {
+      "id": 3,
+      "uri": "webhook-test-event-001.ics",
+      "lastmodified": 1762851169,
+      "etag": "\"2b937b7d77dc83c77329dfdb210ba9d0\"",
+      "calendarid": 1,
+      "size": 297,
+      "component": "vevent",
+      "classification": 0,
+      "uid": "webhook-test-event-001@nextcloud",
+      "calendardata": "BEGIN:VCALENDAR\r\nVERSION:2.0\r\n...",  // Full iCal
+      "{http://nextcloud.com/ns}deleted-at": null
+    },
+    "shares": []  // Array of sharing info
+  }
+}
+```
+
+**Opportunity:** The full iCal content is available in `objectData.calendardata`. The processor could extract metadata directly from the webhook payload instead of making an additional CalDAV request, reducing API load.
+
+### Updated Event Mapping
+
+Based on testing, the actual webhook behavior:
+
+| Nextcloud Event | Fires? | `node.id`/`objectData.id` Present? | Notes |
+|----------------|--------|-------------------------------------|-------|
+| `NodeCreatedEvent` | ✅ Yes | ✅ Yes (`int`) | Fires for folders too |
+| `NodeWrittenEvent` | ✅ Yes | ✅ Yes (`int`) | Fires 1-2x per operation |
+| `NodeDeletedEvent` | ✅ Yes | ❌ **NO** (only `path`) | Critical difference |
+| `CalendarObjectCreatedEvent` | ✅ Yes | ✅ Yes (`objectData.id`) | Full iCal included |
+| `CalendarObjectUpdatedEvent` | ✅ Yes | ✅ Yes (`objectData.id`) | Full iCal included |
+| `CalendarObjectDeletedEvent` | ❌ **DID NOT FIRE** | ❓ Unknown | Possible Nextcloud bug |
+
+### Recommended Implementation Changes
+
+The webhook handler code in this ADR requires these modifications:
+
+1. **Handle missing `node.id` in deletions** (see code example in Finding #1)
+2. **Add deduplication logic** to prevent redundant processing from multiple webhooks per operation
+3. **Validate field existence** before accessing nested properties (`get()` with defaults)
+4. **Log unsupported events** at DEBUG level (not WARNING) to avoid log noise
+5. **Add calendar deletion fallback:** Since webhook unreliable, calendar deletions rely on scanner reconciliation
+6. **Consider payload optimization:** Extract calendar metadata from webhook payload to reduce CalDAV API calls
+
+### Testing Implications
+
+**Integration Test Strategy:**
+
+The asynchronous nature of Nextcloud webhooks makes real webhook delivery unreliable for automated tests:
+
+- ✅ **DO:** POST webhook payloads directly to `/webhooks/nextcloud` endpoint in tests
+- ❌ **DON'T:** Trigger Nextcloud events and wait for webhook delivery
+- ✅ **DO:** Test authentication, payload parsing, and queue integration with mocked payloads
+- ❌ **DON'T:** Assume webhooks fire immediately or reliably
+
+**Manual Testing Required:**
+- Real webhook delivery latency (depends on background job workers)
+- Calendar deletion webhook behavior (confirm bug or configuration issue)
+- Behavior under high-frequency updates (bulk operations)
+- Network failure handling (Nextcloud can't reach MCP server)
+
+### Complete Tested Payload Examples
+
+See `webhook-testing-findings.md` in the repository root for:
+- Complete JSON payloads for all tested events
+- Detailed schema validation results
+- Additional edge cases and observations
+- Screenshots of webhook logs
+
+## References
+
+- ADR-007: Background Vector Database Synchronization (polling architecture)
+- Nextcloud Documentation: `~/Software/documentation/admin_manual/webhook_listeners/index.rst`
+- Nextcloud OCS API: Webhook registration endpoint
+- Current scanner implementation: `nextcloud_mcp_server/vector/scanner.py:37`
+- Webhook Testing Report: `webhook-testing-findings.md` (2025-01-11)
@@ -0,0 +1,943 @@
+# ADR-011: Improving Semantic Search Quality Through Better Chunking and Embeddings
+
+**Status**: Partially Implemented (Chunking Complete, Embeddings Pending)
+**Date**: 2025-11-12
+**Implementation Date**: 2025-11-18 (Chunking)
+**Authors**: Development Team
+**Related**: ADR-003 (Vector Database Architecture), ADR-008 (MCP Sampling for RAG)
+
+## Context
+
+The semantic search implementation provides document retrieval across Nextcloud apps using vector embeddings. Production usage has revealed that **the system frequently misses relevant documents** (recall problem).
+
+Root cause analysis identifies two fundamental issues:
+
+### 1. Poor Chunking Strategy
+
+**Current Implementation** (`nextcloud_mcp_server/vector/document_chunker.py:36`):
+```python
+words = content.split()  # Naive whitespace splitting
+chunk_size = 512  # words
+overlap = 50  # words
+chunks = [words[i:i+chunk_size] for i in range(0, len(words), chunk_size-overlap)]
+```
+
+**Problems**:
+- **Breaks semantic boundaries**: Splits mid-sentence, mid-paragraph, mid-thought
+- **Loses context**: "The meeting discussed budget. We decided to..." becomes two disconnected chunks
+- **Poor retrieval**: Relevant content split across chunks with low individual relevance scores
+- **No structure awareness**: Ignores markdown headers, lists, code blocks
+
+**Evidence**:
+- Documents with relevant content in middle sections score poorly (content split across 3+ chunks)
+- Multi-sentence concepts (spanning 60-100 words) are fragmented
+- Search for "budget planning process" misses documents where these words appear in adjacent sentences but different chunks
+
+### 2. Suboptimal Embedding Model
+
+**Current Implementation** (`nextcloud_mcp_server/embedding/ollama_provider.py:33`):
+```python
+_model = "nomic-embed-text"  # 768 dimensions
+_dimension = 768  # Hardcoded
+```
+
+**Problems**:
+- **Model selection**: `nomic-embed-text` is general-purpose, not optimized for our use case
+- **No benchmarking**: Selected without comparative evaluation
+- **Dimensionality**: 768-dim may be insufficient for nuanced semantic distinctions
+- **No domain adaptation**: Model not tuned for Nextcloud content (notes, calendar, deck cards)
+
+**Evidence**:
+- Synonymous queries return different results ("meeting notes" vs. "discussion summary")
+- Domain-specific terms poorly represented ("standup", "retrospective", "OKRs")
+- Cross-lingual content (if present) not well supported
+
+### Current Performance
+
+**Baseline Metrics** (100-document test corpus, 50 queries):
+- **Recall@10**: ~52% (misses 48% of relevant documents)
+- **Precision@10**: ~78% (acceptable but room for improvement)
+- **MRR**: 0.58 (relevant docs often not in top positions)
+- **Zero-result queries**: 18% (completely missing relevant content)
+
+## Decision Drivers
+
+1. **Address Root Causes**: Fix fundamental issues (chunking, embeddings) before adding complexity (reranking, hybrid search)
+2. **Measurable Impact**: Target 40-60% improvement in recall through chunking/embedding alone
+3. **Independence**: Improvements should be orthogonal to future enhancements (reranking, GraphRAG)
+4. **Cost Efficiency**: Minimize infrastructure and API costs
+5. **Reindexing Acceptable**: One-time reindex cost justified by long-term quality improvement
+
+## Options Considered
+
+### Chunking Strategies
+
+#### Option C1: Semantic Sentence-Aware Chunking (RECOMMENDED)
+
+**Description**: Respect sentence boundaries while maintaining target chunk size
+
+**Implementation**:
+```python
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=2048,  # ~512 words in characters
+    chunk_overlap=200,  # ~50 words in characters
+    separators=["\n\n", "\n", ". ", "! ", "? ", "; ", ": ", ", ", " "],
+    length_function=len,
+)
+```
+
+**How it works**:
+1. Try splitting by paragraphs (`\n\n`)
+2. If chunks too large, split by sentences (`. `, `! `, `? `)
+3. If still too large, split by clauses (`;`, `:`)
+4. Last resort: split by words
+
+**Pros**:
+- ✅ Preserves semantic boundaries (never breaks mid-sentence)
+- ✅ Maintains context coherence within chunks
+- ✅ Simple implementation (langchain library)
+- ✅ Configurable separators for different content types
+- ✅ Proven approach (used by major RAG systems)
+
+**Cons**:
+- ❌ Variable chunk sizes (not exactly 512 words, but close)
+- ❌ Adds dependency (langchain)
+- ❌ Slightly slower than naive splitting (~10-20ms per document)
+
+**Expected Impact**: 20-30% recall improvement
+
+#### Option C2: Hierarchical Context-Preserving Chunks
+
+**Description**: Create overlapping parent/child chunks
+
+**Structure**:
+```
+Document → Large parent chunks (1024 words) → Small child chunks (256 words)
+          ↓                                    ↓
+   Stored in Qdrant                       Searched first
+                                          Return parent context
+```
+
+**Implementation**:
+```python
+# Generate child chunks (searched)
+child_chunks = splitter.split_text(content, chunk_size=1024)
+
+# Generate parent chunks (context)
+parent_chunks = splitter.split_text(content, chunk_size=4096)
+
+# Store both with parent-child relationships
+for child_idx, child in enumerate(child_chunks):
+    parent_idx = find_parent(child_idx)
+    store_vector(
+        vector=embed(child),
+        payload={
+            "chunk": child,
+            "parent_chunk": parent_chunks[parent_idx],
+            "chunk_type": "child"
+        }
+    )
+```
+
+**Pros**:
+- ✅ Best of both worlds: precise matching + full context
+- ✅ Handles multi-hop information needs
+- ✅ Better for long documents (> 1000 words)
+
+**Cons**:
+- ❌ 2x storage (parent + child chunks)
+- ❌ More complex implementation
+- ❌ Higher indexing time (embed twice)
+- ❌ Query complexity (retrieve child, return parent)
+
+**Expected Impact**: 35-45% recall improvement (diminishing returns vs. complexity)
+
+**Verdict**: ⚠️ Consider only if Option C1 insufficient
+
+#### Option C3: Document Structure-Aware Chunking
+
+**Description**: Parse markdown/document structure before chunking
+
+**Implementation**:
+```python
+import mistune  # Markdown parser
+
+def structure_aware_chunk(markdown_content: str) -> list[str]:
+    ast = mistune.create_markdown(renderer='ast')(markdown_content)
+
+    chunks = []
+    for node in ast:
+        if node['type'] == 'heading':
+            # Start new chunk at each header
+            current_chunk = node['children'][0]['raw']
+        elif node['type'] == 'paragraph':
+            current_chunk += "\n" + node['children'][0]['raw']
+            if len(current_chunk) > 2048:
+                chunks.append(current_chunk)
+                current_chunk = ""
+
+    return chunks
+```
+
+**Pros**:
+- ✅ Respects document logical structure
+- ✅ Headers provide context for chunks
+- ✅ Works well for structured notes (documentation, meeting notes with sections)
+
+**Cons**:
+- ❌ Complex implementation (parser, AST traversal)
+- ❌ Markdown-specific (doesn't help calendar events, deck cards)
+- ❌ Variable chunk sizes (some sections very short/long)
+- ❌ Breaks for unstructured content
+
+**Expected Impact**: 15-25% improvement for structured content only
+
+**Verdict**: ⚠️ Future enhancement after Option C1
+
+#### Option C4: Fixed Sliding Window (Current Baseline)
+
+**Description**: Current naive word-based splitting
+
+**Verdict**: ❌ Superseded by Option C1
+
+### Embedding Model Strategies
+
+#### Option E1: Upgrade to Better General-Purpose Model (RECOMMENDED)
+
+**Description**: Switch to state-of-the-art embedding model
+
+**Candidates**:
+
+| Model | Dimensions | MTEB Score | Pros | Cons |
+|-------|-----------|------------|------|------|
+| **mxbai-embed-large** | 1024 | 64.68 | Best performance, good balance | Larger (slower) |
+| **nomic-embed-text-v1.5** | 768 | 62.39 | Upgraded version of current | Incremental improvement |
+| **bge-large-en-v1.5** | 1024 | 64.23 | Excellent for English | Not multilingual |
+| **nomic-embed-text** (current) | 768 | 60.10 | Baseline | Lower performance |
+
+**MTEB**: Massive Text Embedding Benchmark (higher = better semantic understanding)
+
+**Recommendation**: **mxbai-embed-large-v1**
+- Best MTEB score (64.68)
+- 1024 dimensions (richer semantic space)
+- Works well via Ollama
+- ~15-20% better retrieval quality in benchmarks
+
+**Implementation**:
+```python
+# config.py
+OLLAMA_EMBEDDING_MODEL = "mxbai-embed-large-v1"  # Changed from nomic-embed-text
+
+# ollama_provider.py
+async def get_dimension(self) -> int:
+    # Query Ollama for actual dimension instead of hardcoding
+    response = await self.client.post("/api/show", json={"name": self.model})
+    return response.json()["details"]["embedding_length"]
+```
+
+**Migration**:
+1. Deploy new model to Ollama
+2. Create new Qdrant collection (different dimension)
+3. Reindex all documents with new embeddings
+4. Swap collections atomically
+5. Delete old collection
+
+**Pros**:
+- ✅ Immediate quality improvement (15-20%)
+- ✅ Simple change (config + reindex)
+- ✅ No code complexity
+- ✅ Future-proof (state-of-the-art model)
+
+**Cons**:
+- ❌ Requires full reindex (2-4 hours for 1000 documents)
+- ❌ Larger model = slower embedding (~50ms vs. 30ms per chunk)
+- ❌ Higher dimensionality = more storage (~30% increase)
+
+**Expected Impact**: 15-25% recall improvement
+
+#### Option E2: Multi-Vector Embeddings (ColBERT-style)
+
+**Description**: Generate multiple embeddings per chunk (token-level)
+
+**Architecture**:
+```
+Chunk → Transformer → Token embeddings (e.g., 50 tokens × 128 dim) → Store all
+Query → Transformer → Token embeddings → MaxSim(query_tokens, doc_tokens)
+```
+
+**MaxSim scoring**:
+```python
+def maxsim_score(query_embeddings, doc_embeddings):
+    # For each query token, find max similarity with any doc token
+    scores = []
+    for q_emb in query_embeddings:
+        max_sim = max(cosine_similarity(q_emb, d_emb) for d_emb in doc_embeddings)
+        scores.append(max_sim)
+    return sum(scores)
+```
+
+**Pros**:
+- ✅ Best retrieval quality (state-of-the-art results)
+- ✅ Fine-grained matching (token-level)
+- ✅ Handles partial matches better
+
+**Cons**:
+- ❌ **50-100x storage increase** (50 vectors per chunk vs. 1)
+- ❌ **Slower search** (compute MaxSim for each candidate)
+- ❌ **Complex implementation** (custom scoring, storage schema)
+- ❌ **Requires specialized model** (ColBERTv2, not available in Ollama)
+
+**Expected Impact**: 40-50% improvement, but at very high cost
+
+**Verdict**: ❌ Too complex, too expensive for marginal gain over E1+C1
+
+#### Option E3: Fine-Tuned Domain-Specific Model
+
+**Description**: Fine-tune embedding model on Nextcloud corpus
+
+**Process**:
+1. Collect training data (query-document pairs)
+2. Fine-tune base model (e.g., `nomic-embed-text`) on domain data
+3. Deploy fine-tuned model via Ollama
+4. Reindex with fine-tuned embeddings
+
+**Training data needed**:
+- 1,000+ query-document pairs
+- Labeled relevance (positive/negative examples)
+- Representative of real usage
+
+**Pros**:
+- ✅ Optimized for specific content (notes, calendar, deck)
+- ✅ Better handling of domain terminology
+- ✅ Highest potential quality improvement (30-40%)
+
+**Cons**:
+- ❌ **Requires training data** (expensive to collect)
+- ❌ **GPU infrastructure** needed for fine-tuning
+- ❌ **Expertise required** (ML/NLP knowledge)
+- ❌ **Maintenance burden** (retrain as corpus evolves)
+- ❌ **Time investment**: 2-4 weeks initial setup
+
+**Expected Impact**: 30-40% improvement, but high cost
+
+**Verdict**: ⚠️ Consider only if E1+C1 insufficient AND have training data
+
+#### Option E4: Ensemble Embeddings
+
+**Description**: Generate embeddings with multiple models, combine scores
+
+**Implementation**:
+```python
+models = ["mxbai-embed-large-v1", "bge-large-en-v1.5"]
+
+# Index
+embeddings = [await embed(chunk, model) for model in models]
+store_multi_vector(embeddings)
+
+# Search
+query_embeddings = [await embed(query, model) for model in models]
+scores = [search(q_emb, model) for q_emb, model in zip(query_embeddings, models)]
+combined_score = 0.5 * scores[0] + 0.5 * scores[1]
+```
+
+**Pros**:
+- ✅ Robust to individual model weaknesses
+- ✅ Better coverage of semantic space
+
+**Cons**:
+- ❌ 2x storage and compute
+- ❌ Complex scoring and fusion
+- ❌ Marginal improvement (~5-10%) over single best model
+
+**Expected Impact**: 5-10% over best single model
+
+**Verdict**: ❌ Not worth complexity
+
+### Combined Strategies
+
+#### Option D1: Best Chunking + Best Embedding (RECOMMENDED)
+
+**Combination**: Option C1 (Semantic Chunking) + Option E1 (mxbai-embed-large-v1)
+
+**Expected Impact**:
+- Chunking: +20-30% recall
+- Embedding: +15-25% recall
+- **Combined**: +35-55% recall improvement (not strictly additive, but significant)
+
+**Cost**:
+- Development: 1-2 days
+- Reindex: 2-4 hours (one-time)
+- Ongoing: None (same infrastructure)
+
+**Pros**:
+- ✅ Addresses both root causes
+- ✅ Orthogonal improvements (chunking + embedding)
+- ✅ Simple implementation
+- ✅ No new infrastructure
+- ✅ Future-proof foundation for additional enhancements (reranking, hybrid search)
+
+**Cons**:
+- ❌ Requires full reindex (manageable)
+- ❌ Slightly higher storage (1024 vs. 768 dim)
+
+**Verdict**: ✅ **RECOMMENDED**
+
+## Decision
+
+**Adopt Option D1: Semantic Chunking + Upgraded Embedding Model**
+
+Implement both improvements together to maximize recall improvement:
+
+### 1. Semantic Sentence-Aware Chunking
+
+**Changes**:
+- Replace naive word splitting with `RecursiveCharacterTextSplitter`
+- Preserve sentence boundaries, paragraph structure
+- Maintain similar chunk sizes (~512 words / 2048 characters)
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/vector/document_chunker.py
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+class DocumentChunker:
+    """Chunk documents into semantically coherent pieces."""
+
+    def __init__(
+        self,
+        chunk_size: int = 2048,  # Characters, not words
+        chunk_overlap: int = 200,  # Characters, not words
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            separators=[
+                "\n\n",  # Paragraphs (highest priority)
+                "\n",    # Lines
+                ". ",    # Sentences
+                "! ",
+                "? ",
+                "; ",    # Clauses
+                ": ",
+                ", ",    # Phrases
+                " ",     # Words (last resort)
+            ],
+            length_function=len,
+            is_separator_regex=False,
+        )
+
+    def chunk_text(self, content: str) -> list[str]:
+        """
+        Chunk text while preserving semantic boundaries.
+
+        Args:
+            content: Full document text
+
+        Returns:
+            List of text chunks, each ending at a semantic boundary
+        """
+        if not content:
+            return []
+
+        # Use RecursiveCharacterTextSplitter for semantic boundaries
+        chunks = self.splitter.split_text(content)
+
+        return chunks
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old (word-based)
+DOCUMENT_CHUNK_SIZE: int = 512  # words
+DOCUMENT_CHUNK_OVERLAP: int = 50  # words
+
+# New (character-based, more precise)
+DOCUMENT_CHUNK_SIZE: int = 2048  # characters (~512 words)
+DOCUMENT_CHUNK_OVERLAP: int = 200  # characters (~50 words)
+```
+
+**Dependency** (`pyproject.toml`):
+```toml
+[project]
+dependencies = [
+    # ... existing dependencies
+    "langchain-text-splitters>=0.2.0",
+]
+```
+
+### 2. Upgrade Embedding Model
+
+**Changes**:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Dynamic dimension detection (query Ollama instead of hardcoding)
+- Create new Qdrant collection for new dimensions
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/embedding/ollama_provider.py
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    def __init__(self, base_url: str, model: str, verify_ssl: bool = True):
+        self.base_url = base_url
+        self.model = model
+        self._dimension: int | None = None  # Changed: query dynamically
+        self.client = httpx.AsyncClient(base_url=base_url, verify=verify_ssl)
+
+    async def dimension(self) -> int:
+        """Get embedding dimension from Ollama API."""
+        if self._dimension is None:
+            try:
+                response = await self.client.post(
+                    "/api/show",
+                    json={"name": self.model},
+                    timeout=10.0,
+                )
+                response.raise_for_status()
+                info = response.json()
+                self._dimension = info.get("details", {}).get("embedding_length")
+
+                if self._dimension is None:
+                    # Fallback: generate test embedding to detect dimension
+                    test_emb = await self.embed("test")
+                    self._dimension = len(test_emb)
+
+            except Exception as e:
+                logger.warning(f"Failed to get dimension from Ollama: {e}, using fallback")
+                # Fallback dimensions by model name
+                if "mxbai-embed-large" in self.model:
+                    self._dimension = 1024
+                elif "nomic-embed-text" in self.model:
+                    self._dimension = 768
+                else:
+                    self._dimension = 768  # Default
+
+        return self._dimension
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old
+OLLAMA_EMBEDDING_MODEL: str = "nomic-embed-text"
+
+# New
+OLLAMA_EMBEDDING_MODEL: str = "mxbai-embed-large-v1"
+```
+
+**Environment Variable**:
+```bash
+OLLAMA_EMBEDDING_MODEL=mxbai-embed-large-v1
+```
+
+### 3. Migration Strategy
+
+**Reindexing Process**:
+
+```python
+# nextcloud_mcp_server/vector/migration.py
+
+async def migrate_to_new_embeddings():
+    """
+    Migrate from old embeddings to new embeddings.
+
+    Process:
+    1. Create new collection with new dimension
+    2. Reindex all documents with new embeddings
+    3. Atomic swap (update collection name in config)
+    4. Delete old collection
+    """
+    old_collection = "nextcloud_content"
+    new_collection = "nextcloud_content_v2"
+
+    # 1. Create new collection
+    await qdrant_client.create_collection(
+        collection_name=new_collection,
+        vectors_config=VectorParams(
+            size=1024,  # mxbai-embed-large-v1 dimension
+            distance=Distance.COSINE,
+        ),
+    )
+
+    # 2. Reindex all documents
+    logger.info("Starting reindex with new embeddings...")
+    scanner = VectorScanner(...)
+    processor = VectorProcessor(collection_name=new_collection, ...)
+
+    await scanner.scan_all()  # Rescans and re-embeds all documents
+
+    # 3. Wait for completion
+    while True:
+        status = await get_sync_status()
+        if status.pending_documents == 0:
+            break
+        await asyncio.sleep(5)
+
+    # 4. Atomic swap
+    # Update config to point to new collection
+    # (or use collection alias in Qdrant)
+    await qdrant_client.update_collection_aliases(
+        change_aliases_operations=[
+            CreateAliasOperation(
+                create_alias=CreateAlias(
+                    collection_name=new_collection,
+                    alias_name="nextcloud_content"
+                )
+            )
+        ]
+    )
+
+    # 5. Verify new collection works
+    test_results = await run_benchmark_queries()
+    if test_results.recall < baseline_recall:
+        # Rollback
+        logger.error("New embeddings worse than baseline, rolling back")
+        await rollback_migration()
+        return False
+
+    # 6. Delete old collection
+    await qdrant_client.delete_collection(old_collection)
+    logger.info("Migration complete!")
+    return True
+```
+
+**Downtime Mitigation**:
+- Use Qdrant collection aliases for atomic swap
+- Reindex can happen in background
+- Only brief downtime during alias swap (~1s)
+
+**Rollback Plan**:
+- Keep old collection until validation complete
+- If new embeddings worse, swap alias back to old collection
+- No data loss
+
+### 4. Validation & Benchmarking
+
+**Before/After Comparison**:
+
+```python
+# tests/benchmarks/chunking_embedding_comparison.py
+
+async def benchmark_chunking_embeddings():
+    """
+    Compare old vs. new chunking and embeddings on test queries.
+    """
+    test_queries = load_benchmark_queries()  # 100 queries with known relevant docs
+
+    # Baseline (current)
+    baseline_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content",  # Old: nomic-embed-text, word chunks
+    )
+
+    # New implementation
+    new_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content_v2",  # New: mxbai-embed-large-v1, semantic chunks
+    )
+
+    # Compare metrics
+    comparison = {
+        "baseline": {
+            "recall@10": calculate_recall(baseline_results, k=10),
+            "precision@10": calculate_precision(baseline_results, k=10),
+            "mrr": calculate_mrr(baseline_results),
+            "zero_result_rate": calculate_zero_result_rate(baseline_results),
+        },
+        "new": {
+            "recall@10": calculate_recall(new_results, k=10),
+            "precision@10": calculate_precision(new_results, k=10),
+            "mrr": calculate_mrr(new_results),
+            "zero_result_rate": calculate_zero_result_rate(new_results),
+        },
+        "improvement": {
+            "recall_improvement": (new_recall - baseline_recall) / baseline_recall,
+            "precision_improvement": (new_precision - baseline_precision) / baseline_precision,
+        }
+    }
+
+    return comparison
+```
+
+**Success Criteria**:
+- **Recall@10**: Improve from ~52% to ≥75% (+40% improvement)
+- **Precision@10**: Maintain ≥75% (no degradation)
+- **MRR**: Improve from 0.58 to ≥0.70
+- **Zero-result rate**: Reduce from 18% to ≤10%
+- **Indexing time**: Maintain ≤10s per document
+
+**Validation Process**:
+1. Run benchmark on baseline (current implementation)
+2. Implement changes
+3. Run benchmark on new implementation
+4. Compare metrics
+5. If improvement ≥40%, proceed to production
+6. If improvement <40%, investigate and iterate
+
+## Implementation Timeline
+
+### Week 1: Development & Testing
+
+**Day 1-2: Chunking Implementation**
+- [ ] Add langchain-text-splitters dependency
+- [ ] Refactor `document_chunker.py`
+- [ ] Update configuration (character-based chunk sizes)
+- [ ] Write unit tests for semantic boundaries
+- [ ] Validate: Chunks never break mid-sentence
+
+**Day 3-4: Embedding Implementation**
+- [ ] Update `ollama_provider.py` with dynamic dimension detection
+- [ ] Update configuration (new model name)
+- [ ] Deploy `mxbai-embed-large-v1` to Ollama
+- [ ] Test embedding generation with new model
+- [ ] Validate: Embeddings are 1024-dim
+
+**Day 5: Migration Script**
+- [ ] Write migration script (collection creation, reindexing, alias swap)
+- [ ] Test migration on staging environment
+- [ ] Validate: No data loss, atomic swap works
+
+### Week 2: Reindexing & Validation
+
+**Day 1-2: Staging Reindex**
+- [ ] Run full reindex on staging environment
+- [ ] Monitor indexing performance
+- [ ] Validate: All documents indexed correctly
+
+**Day 3: Benchmarking**
+- [ ] Run benchmark queries on old collection (baseline)
+- [ ] Run benchmark queries on new collection
+- [ ] Compare metrics (recall, precision, MRR)
+- [ ] Validate: ≥40% recall improvement
+
+**Day 4: Production Reindex**
+- [ ] Schedule maintenance window (optional, can run in background)
+- [ ] Run migration script on production
+- [ ] Monitor reindexing progress
+- [ ] Atomic swap when complete
+
+**Day 5: Production Validation**
+- [ ] Monitor search quality metrics
+- [ ] Collect user feedback
+- [ ] Compare production metrics to staging
+- [ ] Rollback if issues detected
+
+## Cost Analysis
+
+### Development Cost
+- **Time**: 1-2 weeks (implementation + validation)
+- **Effort**: 40-60 hours @ $100/hour = $4,000 - $6,000
+
+### Infrastructure Cost
+- **Storage**: +30% (1024-dim vs. 768-dim)
+  - Example: 1,000 notes × 3 chunks × 1024 dim × 4 bytes = 12 MB (negligible)
+- **Compute**: +20% embedding time (50ms vs. 30ms per chunk)
+  - Amortized over batch indexing, minimal impact
+- **No new infrastructure**: Uses existing Ollama + Qdrant
+
+### Reindexing Cost (One-Time)
+- **Time**: 2-4 hours for 1,000 documents
+  - 1,000 docs × 3 chunks × 50ms = 150 seconds (~2.5 minutes embedding)
+  - + Ollama processing time + Qdrant insertion
+- **Downtime**: ~1 second (atomic alias swap)
+
+### Total Cost
+- **Initial**: $4,000 - $6,000 (development + testing)
+- **Ongoing**: $0 (no new infrastructure or API costs)
+
+### ROI
+- **Recall improvement**: +40-60% (finding relevant documents)
+- **User satisfaction**: Reduced zero-result queries (18% → 10%)
+- **Foundation**: Enables future enhancements (reranking, hybrid search)
+- **Cost per % improvement**: $100 - $150 (excellent ROI)
+
+## Consequences
+
+### Positive
+
+1. **Addresses Root Causes**: Fixes fundamental issues (chunking, embeddings) not symptoms
+2. **High Impact**: Expected 40-60% recall improvement from foundational changes
+3. **Future-Proof**: Creates solid foundation for future enhancements (reranking, hybrid search, GraphRAG)
+4. **Simple**: No architectural changes, no new infrastructure
+5. **Orthogonal**: Improvements are independent, can be validated separately
+6. **Low Risk**: Proven techniques (RecursiveCharacterTextSplitter, mxbai-embed-large-v1)
+7. **Maintainable**: Standard libraries and models, easy to debug
+
+### Negative
+
+1. **Reindexing Required**: 2-4 hours one-time cost (manageable, can run in background)
+2. **Storage Increase**: +30% for higher-dimensional embeddings (12 MB vs. 9 MB for 1K docs)
+3. **Slower Indexing**: +20% embedding time (50ms vs. 30ms per chunk)
+4. **Dependency**: Adds langchain-text-splitters (minimal, well-maintained library)
+5. **Not a Complete Solution**: May still need reranking/hybrid search for optimal recall (but solid foundation)
+
+### Neutral
+
+1. **Model Lock-In**: Committed to mxbai-embed-large-v1, but can change later (another reindex)
+2. **Chunk Size Trade-offs**: ~512 words is heuristic, may need tuning for specific content types
+
+## Monitoring & Success Metrics
+
+### Real-Time Metrics (Grafana)
+
+**Search Quality**:
+- `semantic_search_recall_at_10` (target: ≥75%)
+- `semantic_search_precision_at_10` (target: ≥75%)
+- `semantic_search_mrr` (target: ≥0.70)
+- `semantic_search_zero_result_rate` (target: ≤10%)
+
+**Performance**:
+- `semantic_search_latency_ms` (p50, p95, p99)
+- `embedding_generation_time_ms`
+- `indexing_throughput_docs_per_sec`
+
+**Indexing**:
+- `documents_indexed_total`
+- `documents_pending`
+- `indexing_errors_total`
+
+### Weekly Validation
+
+**A/B Testing** (if gradual rollout):
+- 50% users: New embeddings
+- 50% users: Old embeddings
+- Compare metrics for 1 week
+- Full rollout if new embeddings superior
+
+**User Feedback**:
+- Survey: "How satisfied are you with search results?" (1-5 scale)
+- Track: Number of "search not working" support tickets
+- Monitor: User-reported false negatives ("I know this doc exists")
+
+### Rollback Criteria
+
+**Automatic Rollback** if:
+- Recall decreases by >10% from baseline
+- Error rate increases by >50%
+- Query latency increases by >100%
+
+**Manual Rollback** if:
+- User complaints increase significantly
+- Zero-result queries increase instead of decrease
+
+## Future Enhancements
+
+These improvements create a solid foundation. Future enhancements (in order of priority):
+
+1. **Cross-Encoder Reranking** (ADR-012)
+   - Two-stage retrieval: broad recall (50 candidates) → precise reranking (top 10)
+   - Expected: +15-20% additional recall improvement
+   - Builds on: Better embeddings retrieve better candidates to rerank
+
+2. **Hybrid Search** (ADR-013)
+   - Combine vector search + BM25 keyword search
+   - Expected: +10-15% additional recall (especially for exact matches)
+   - Builds on: Semantic chunks provide better keyword match context
+
+3. **Multi-App Indexing** (ADR-014)
+   - Index calendar, deck, files (currently notes-only)
+   - Expected: Expands searchable corpus 3-5x
+   - Builds on: Proven chunking and embedding strategy
+
+4. **GraphRAG** (ADR-015, conditional)
+   - Only if: Global thematic queries needed OR corpus >10K documents
+   - Expected: Relationship discovery, multi-hop reasoning
+   - Builds on: High-quality embeddings improve graph construction
+
+## References
+
+### Research Papers
+
+1. **RecursiveCharacterTextSplitter**
+   - LangChain Documentation: https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter
+   - Proven technique used by major RAG systems
+
+2. **MTEB Leaderboard** (Massive Text Embedding Benchmark)
+   - https://huggingface.co/spaces/mteb/leaderboard
+   - Comprehensive embedding model comparison
+
+3. **mxbai-embed-large**
+   - Model: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
+   - Best general-purpose embedding model (MTEB: 64.68)
+
+### Related ADRs
+
+- **ADR-003**: Vector Database and Semantic Search Architecture (original implementation)
+- **ADR-008**: MCP Sampling for Multi-App Semantic Search with RAG (answer generation)
+
+### Tools & Libraries
+
+- **LangChain Text Splitters**: https://python.langchain.com/docs/modules/data_connection/document_transformers/
+- **Ollama Embedding Models**: https://ollama.ai/library
+- **Qdrant Collections**: https://qdrant.tech/documentation/concepts/collections/
+
+## Summary
+
+This ADR addresses the root causes of poor semantic search recall:
+
+1. **Better Chunking**: Semantic sentence-aware splitting (preserves context)
+2. **Better Embeddings**: Upgrade to mxbai-embed-large-v1 (richer semantic space)
+
+**Expected Impact**: 40-60% recall improvement with minimal cost and complexity.
+
+**Why This Approach**:
+- Fixes fundamentals before adding complexity
+- Proven techniques (not experimental)
+- Simple implementation (1-2 weeks)
+- Creates foundation for future enhancements
+- No new infrastructure or ongoing costs
+
+**Next Steps**: Approve ADR → Implement changes → Reindex → Validate → Production rollout
+
+## Implementation Status
+
+### Completed (2025-11-18)
+
+**✅ Semantic Markdown-Aware Chunking (Option C1 + C3 Hybrid)**
+
+Implementation details:
+- Replaced custom word-based chunking with `MarkdownTextSplitter` from LangChain
+- Optimized for Nextcloud Notes markdown content with special handling for:
+  - Headers (`#`, `##`, `###`, etc.)
+  - Code blocks (` ``` `)
+  - Lists (`-`, `*`, `1.`)
+  - Horizontal rules (`---`)
+  - Paragraphs and sentences
+- Maintained `ChunkWithPosition` interface for backward compatibility
+- Updated configuration defaults:
+  - `DOCUMENT_CHUNK_SIZE`: 512 words → 2048 characters
+  - `DOCUMENT_CHUNK_OVERLAP`: 50 words → 200 characters
+- Updated unit tests to verify position tracking and boundary preservation
+- All tests passing with markdown-aware character-based chunking
+
+**Files Modified**:
+- `nextcloud_mcp_server/vector/document_chunker.py` - LangChain integration
+- `nextcloud_mcp_server/config.py` - Character-based defaults
+- `tests/unit/test_document_chunker.py` - Updated test suite
+
+**Dependencies Added**:
+- `langchain-text-splitters>=1.0.0` (already present in `pyproject.toml`)
+
+**Migration Required**:
+- ⚠️ Full reindex required to apply new chunking strategy
+- Existing documents in vector database use old word-based chunks
+- See "Migration Strategy" section above for reindexing process
+
+### Pending
+
+**⏳ Embedding Model Upgrade (Option E1)**
+
+Still to be implemented:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Implement dynamic dimension detection in `ollama_provider.py`
+- Create migration script for collection reindexing
+- Run benchmarking to validate improvement
+- Deploy to production with atomic collection swap
+
+**Estimated Timeline**: 1-2 weeks for implementation and validation
@@ -0,0 +1,619 @@
+# ADR-012: Unified Multi-Algorithm Search with Client-Configurable Weighting
+
+## Status
+Proposed
+
+## Context
+
+### Current State
+
+The Nextcloud MCP server currently provides semantic search via vector similarity (Qdrant), as designed in ADR-003 and implemented through ADR-007. However, users and MCP clients have limited control over search behavior:
+
+1. **Single algorithm only**: Only pure vector similarity search is available
+2. **No algorithm selection**: MCP clients cannot choose between semantic, keyword, or fuzzy approaches
+3. **No weighting control**: Clients cannot adjust the balance between different search methods
+4. **Disconnected implementations**: Viz pane uses different search algorithms than MCP tools
+5. **Limited flexibility**: No way to optimize search for different use cases (exact match vs. conceptual similarity)
+
+### User Needs
+
+Different search scenarios require different algorithms:
+
+- **Exact match queries**: "Find note titled 'Q1 Budget'" → keyword search preferred
+- **Conceptual queries**: "What are my goals for next quarter?" → semantic search preferred
+- **Typo-tolerant queries**: "Find note about kuberntes" → fuzzy search needed
+- **Balanced queries**: "Find documentation about API endpoints" → hybrid search optimal
+
+Additionally, users need a **testing interface** (viz pane) to:
+- Experiment with different search algorithms on their own documents
+- Visualize search results and algorithm behavior
+- Tune weights for optimal results
+- Understand which algorithm works best for their queries
+
+### Technical Requirements
+
+1. **Unified interface**: Single MCP tool supporting multiple algorithms
+2. **Client control**: MCP clients specify algorithm and weights via tool parameters
+3. **Backward compatibility**: Existing `nc_semantic_search()` behavior preserved
+4. **Shared implementation**: Viz pane and MCP tools use identical search algorithms
+5. **User accessibility**: Viz pane available to all logged-in users with vector sync enabled
+6. **Performance**: Minimal overhead for algorithm selection
+
+## Decision
+
+We will implement a **unified multi-algorithm search architecture** with the following components:
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                         MCP Client / User Browser                            │
+│                                                                               │
+│  ┌──────────────────────────┐         ┌──────────────────────────────────┐  │
+│  │   MCP Tool Call          │         │   Viz Pane (Browser UI)          │  │
+│  │                          │         │                                  │  │
+│  │ nc_semantic_search(      │         │ - Algorithm selector dropdown    │  │
+│  │   query="kubernetes",    │         │ - Weight adjustment sliders      │  │
+│  │   algorithm="hybrid",    │         │ - Interactive 2D scatter plot    │  │
+│  │   semantic_weight=0.5,   │         │ - Side-by-side comparison        │  │
+│  │   keyword_weight=0.3,    │         │ - Real-time search testing       │  │
+│  │   fuzzy_weight=0.2       │         │                                  │  │
+│  │ )                        │         │                                  │  │
+│  └───────────┬──────────────┘         └────────────┬─────────────────────┘  │
+└──────────────┼─────────────────────────────────────┼────────────────────────┘
+               │                                      │
+               │ MCP Protocol                         │ HTTPS (htmx)
+               │                                      │
+┌──────────────▼──────────────────────────────────────▼────────────────────────┐
+│                        MCP Server (/app endpoint)                             │
+│                                                                               │
+│  ┌─────────────────────────────────────────────────────────────────────────┐ │
+│  │              Unified Search Interface (server/semantic.py)              │ │
+│  │                                                                         │ │
+│  │  @mcp.tool() nc_semantic_search(algorithm, weights...)                 │ │
+│  │  ├─ Validate parameters (weights sum ≤1.0)                             │ │
+│  │  ├─ Dispatch to algorithm selector                                     │ │
+│  │  └─ Return ranked SearchResponse                                       │ │
+│  └────────────────────────────┬────────────────────────────────────────────┘ │
+│                                │                                              │
+│  ┌────────────────────────────▼────────────────────────────────────────────┐ │
+│  │              Algorithm Dispatcher (search/algorithms.py)                │ │
+│  │                                                                         │ │
+│  │  if algorithm == "semantic":    → semantic.py                          │ │
+│  │  if algorithm == "keyword":     → keyword.py                           │ │
+│  │  if algorithm == "fuzzy":       → fuzzy.py                             │ │
+│  │  if algorithm == "hybrid":      → hybrid.py (RRF fusion)               │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+│                                                                               │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────┐           │
+│  │  semantic.py     │  │  keyword.py      │  │  fuzzy.py        │           │
+│  │                  │  │                  │  │                  │           │
+│  │ • Query Qdrant   │  │ • Token matching │  │ • Char overlap   │           │
+│  │ • Cosine dist    │  │ • Title weight   │  │ • 70% threshold  │           │
+│  │ • Score ≥0.7     │  │ • ADR-001 logic  │  │ • Simple impl    │           │
+│  └────────┬─────────┘  └────────┬─────────┘  └────────┬─────────┘           │
+│           │                     │                      │                     │
+│           └─────────────────────┼──────────────────────┘                     │
+│                                 │                                            │
+│  ┌──────────────────────────────▼──────────────────────────────────────────┐ │
+│  │                    hybrid.py (Reciprocal Rank Fusion)                   │ │
+│  │                                                                         │ │
+│  │  1. Run algorithms in parallel (semantic, keyword, fuzzy)              │ │
+│  │  2. Collect ranked results from each                                   │ │
+│  │  3. Apply RRF formula: score = weight / (k + rank)                     │ │
+│  │  4. Combine scores across algorithms                                   │ │
+│  │  5. Re-rank by combined score                                          │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+└───────────────────────────────────┬───────────────────────────────────────────┘
+                                    │
+                    ┌───────────────┴───────────────┐
+                    │                               │
+         ┌──────────▼──────────┐         ┌─────────▼────────────┐
+         │ Qdrant Vector DB    │         │ Nextcloud APIs       │
+         │                     │         │                      │
+         │ • Vector search     │         │ • Access verification│
+         │ • user_id filter    │         │ • Full metadata fetch│
+         │ • Score threshold   │         │ • Permission checks  │
+         │ • 768-dim embeddings│         │                      │
+         └─────────────────────┘         └──────────────────────┘
+```
+
+### Data Flow
+
+#### MCP Tool Request
+```
+1. Client calls nc_semantic_search(query, algorithm="hybrid", weights...)
+2. Server validates parameters (weights sum ≤1.0)
+3. Dispatcher routes to hybrid.py
+4. Hybrid search runs semantic, keyword, fuzzy in parallel
+5. RRF combines results with weighted scores
+6. Access verification via Nextcloud API
+7. Return ranked SearchResponse to client
+```
+
+#### Viz Pane Request (Server-Side Processing)
+```
+1. User navigates to /app (Vector Visualization tab)
+2. Browser loads vector-viz fragment via htmx
+3. User enters query and adjusts algorithm/weights
+4. htmx sends request to /app/vector-viz endpoint
+5. Server executes search via search/algorithms.py:
+   - Filters by user_id (multi-tenant security)
+   - Applies selected algorithm (semantic/keyword/fuzzy/hybrid)
+   - Filters by document type (notes/files/calendar/contacts)
+   - Retrieves matching results + metadata
+6. Server performs PCA reduction (768-dim → 2D):
+   - Converts matching results to 2D coordinates
+   - Only sends coordinates + metadata (not full vectors)
+   - Dramatically reduces bandwidth (e.g., 768 floats → 2 floats per doc)
+7. Server returns JSON: {results: [...], coordinates_2d: [...], stats: {...}}
+8. Browser receives lightweight response
+9. Plotly.js renders interactive scatter plot
+10. Matching results highlighted (blue), non-matches grayed (40% opacity)
+```
+
+**Performance Benefits of Server-Side Processing**:
+- **Bandwidth reduction**: ~384x less data (2 floats vs 768 floats per document)
+- **Client efficiency**: Browser only handles visualization, not computation
+- **Scalability**: Can visualize 10,000+ documents without client-side lag
+- **Security**: Raw vectors never leave server
+- **Consistency**: Same search logic as MCP tool (no drift)
+
+### 1. Core Search Algorithms
+
+Four search algorithms will be available:
+
+#### a) Semantic Search (Vector Similarity)
+- **Method**: Cosine distance in 768-dimensional embedding space
+- **Implementation**: Qdrant `query_points` with user_id filtering
+- **Use case**: Conceptual queries, finding related content
+- **Current status**: Implemented in `nextcloud_mcp_server/server/semantic.py`
+
+#### b) Keyword Search (Token-Based)
+- **Method**: Token matching with weighted scoring (from ADR-001)
+- **Implementation**: Title matches weighted 3x higher than content
+- **Use case**: Exact phrase matching, known titles
+- **Current status**: Designed in ADR-001, not implemented
+
+#### c) Fuzzy Search (Character Overlap)
+- **Method**: Simple character-based similarity (70% threshold)
+- **Implementation**: Character set comparison (current viz pane approach)
+- **Use case**: Typo tolerance, approximate matching
+- **Current status**: Implemented in viz pane only
+
+#### d) Hybrid Search (Multi-Algorithm Fusion)
+- **Method**: Reciprocal Rank Fusion (RRF) from ADR-003
+- **Implementation**: Parallel execution + score combination
+- **Use case**: Balanced queries, general-purpose search
+- **Current status**: Designed in ADR-003, not implemented
+
+### 2. Unified MCP Tool Interface
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(
+    query: str,
+    ctx: Context,
+    limit: int = 10,
+    score_threshold: float = 0.7,
+    algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+    semantic_weight: float = 0.5,
+    keyword_weight: float = 0.3,
+    fuzzy_weight: float = 0.2,
+) -> SearchResponse:
+    """
+    Search Nextcloud content using configurable algorithms.
+
+    Args:
+        query: Natural language search query
+        ctx: MCP context for authentication
+        limit: Maximum results to return
+        score_threshold: Minimum similarity score (semantic/hybrid only)
+        algorithm: Search algorithm to use
+        semantic_weight: Weight for semantic results (hybrid only, default: 0.5)
+        keyword_weight: Weight for keyword results (hybrid only, default: 0.3)
+        fuzzy_weight: Weight for fuzzy results (hybrid only, default: 0.2)
+
+    Returns:
+        Ranked search results with scores and excerpts
+    """
+```
+
+**Key decisions**:
+- **Single tool name**: Keep `nc_semantic_search` for backward compatibility
+- **Algorithm parameter**: Explicit selection via enum
+- **Weight parameters**: Client-configurable, only apply to hybrid mode
+- **Validation**: Weights must sum to ≤1.0, enforced server-side
+- **Defaults**: Hybrid mode with balanced weights (semantic 50%, keyword 30%, fuzzy 20%)
+
+### 3. Shared Algorithm Implementation
+
+Extract search algorithms into reusable module:
+
+```
+nextcloud_mcp_server/
+├── search/
+│   ├── __init__.py
+│   ├── algorithms.py          # Core search implementations
+│   ├── semantic.py             # Vector similarity search
+│   ├── keyword.py              # Token-based search (ADR-001)
+│   ├── fuzzy.py                # Character overlap search
+│   └── hybrid.py               # RRF fusion (ADR-003)
+└── server/
+    └── semantic.py             # MCP tool wrapper
+```
+
+**Benefits**:
+- Viz pane and MCP tools share identical implementations
+- Testable in isolation
+- Easy to add new algorithms (e.g., BM25, neural reranking)
+- Clear separation of concerns
+
+### 4. Viz Pane Integration
+
+Update viz pane (`nextcloud_mcp_server/auth/userinfo_routes.py`) to:
+
+1. **Use shared algorithms**: Import from `search/algorithms.py`
+2. **Server-side filtering**: All search and filtering operations happen server-side
+   - Query execution via shared search backend
+   - Document type filtering (notes, files, calendar, contacts)
+   - User ID filtering for multi-tenant security
+   - Only matching results + metadata sent to client
+   - Reduces bandwidth and improves performance
+3. **PCA reduction**: Server performs dimensionality reduction (768-dim → 2D)
+   - Only 2D coordinates sent to browser for visualization
+   - Dramatically reduces data transfer vs sending full vectors
+   - Enables visualization of large document collections
+4. **User accessibility**: Available to all users with vector sync enabled
+5. **Security**: Filter results by `user_id` (only show user's own documents)
+6. **Interactive testing**: Allow users to:
+   - Select algorithm type
+   - Adjust weights (hybrid mode)
+   - Compare results across algorithms
+   - Visualize result distribution in 2D space
+
+#### Viz Pane UI Components
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│ Vector Visualization                                          [Status] │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Configuration                                             │  │
+│ │                                                                  │  │
+│ │ Query: [_______________________________________________] [Search]│  │
+│ │                                                                  │  │
+│ │ Algorithm: [Hybrid ▼]  [Semantic] [Keyword] [Fuzzy]             │  │
+│ │                                                                  │  │
+│ │ Weights (Hybrid Mode):                                           │  │
+│ │   Semantic: [========50========] 0.5                             │  │
+│ │   Keyword:  [======30======    ] 0.3                             │  │
+│ │   Fuzzy:    [====20====        ] 0.2                             │  │
+│ │                                                                  │  │
+│ │ Document Types: ☑ Notes  ☑ Files  ☑ Calendar  ☑ Contacts        │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Vector Space Visualization (PCA 2D Projection)                   │  │
+│ │                                                                  │  │
+│ │        ▲                                                         │  │
+│ │    PC2 │     ●  ● ●      🔵 Matching results (full opacity)     │  │
+│ │        │  ●     ●  ●     ⚪ Non-matching results (40% opacity)   │  │
+│ │        │    🔵  ● ●                                              │  │
+│ │        │  ●  🔵  ●       Hover: Show document title + excerpt    │  │
+│ │        │  ● ●  🔵 ●      Click: Open document in Nextcloud       │  │
+│ │    ────┼──●─🔵──●─●────► PC1                                     │  │
+│ │        │   ● ●  ●                                                │  │
+│ │        │    🔵 ●   ●     Explained Variance:                     │  │
+│ │        │  ●    ●  ●      PC1: 23.4% | PC2: 18.7%                 │  │
+│ │        │     ● ●                                                 │  │
+│ │                                                                  │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Results (12 matching documents)                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Kubernetes Setup Guide                        Score: 0.87     │  │
+│ │    "...configure kubectl to connect to cluster..."              │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Container Orchestration Notes                 Score: 0.82     │  │
+│ │    "...deployment strategies for kubernetes..."                 │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 K8s Troubleshooting                           Score: 0.79     │  │
+│ │    "...common kuberntes errors and solutions..."                │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ [Show More Results...]                                           │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Algorithm Performance Comparison                                 │  │
+│ │                                                                  │  │
+│ │ Algorithm    │ Results │ Avg Score │ Time (ms) │ Precision     │  │
+│ │ ─────────────┼─────────┼───────────┼───────────┼───────────     │  │
+│ │ Semantic     │   45    │   0.78    │   145ms   │  ████░ 0.82   │  │
+│ │ Keyword      │   23    │   0.91    │    42ms   │  ███░░ 0.67   │  │
+│ │ Fuzzy        │   67    │   0.72    │    89ms   │  ██░░░ 0.45   │  │
+│ │ Hybrid (RRF) │   52    │   0.84    │   198ms   │  █████ 0.89   │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+**Key UI Features**:
+
+1. **Search Input**: Real-time query testing with instant visualization
+2. **Algorithm Selector**: Dropdown + quick-select buttons
+3. **Weight Sliders**: Visual adjustment with live preview (hybrid mode only)
+4. **Document Type Filters**: Checkboxes for notes, files, calendar, contacts
+5. **2D Scatter Plot**: Interactive Plotly.js visualization
+   - Blue dots = matching documents (full opacity)
+   - Gray dots = non-matching documents (40% opacity)
+   - Hover = show title + excerpt tooltip
+   - Click = open document in Nextcloud
+   - Zoom/pan controls for exploration
+6. **Results Panel**: Ranked list with scores and excerpts
+7. **Performance Table**: Compare algorithm speed and accuracy
+8. **Explained Variance**: Show how much information PCA preserves
+
+**Technology Stack**:
+- **Frontend**: htmx for dynamic loading, Alpine.js for reactivity
+- **Visualization**: Plotly.js for interactive scatter plots
+- **Styling**: Tailwind CSS (consistent with existing /app UI)
+- **Backend**: Shared `search/algorithms.py` implementation
+
+### 5. Reciprocal Rank Fusion (RRF) for Hybrid Search
+
+Following ADR-003's design:
+
+```python
+def reciprocal_rank_fusion(
+    results: dict[str, list[SearchResult]],
+    weights: dict[str, float],
+    k: int = 60
+) -> list[SearchResult]:
+    """
+    Combine multiple ranked result lists using RRF.
+
+    Args:
+        results: Dict of algorithm_name -> ranked results
+        weights: Dict of algorithm_name -> weight (0-1)
+        k: RRF constant (default: 60, standard value)
+
+    Returns:
+        Combined and re-ranked results
+    """
+    scores = defaultdict(float)
+
+    for algo_name, algo_results in results.items():
+        weight = weights.get(algo_name, 0.0)
+        for rank, result in enumerate(algo_results, start=1):
+            # RRF formula: 1 / (k + rank)
+            rrf_score = weight / (k + rank)
+            scores[result.doc_id] += rrf_score
+
+    # Sort by combined score, return top results
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
+```
+
+**RRF properties**:
+- **Rank-based**: Uses position, not raw scores (handles score scale differences)
+- **Proven effective**: Standard approach in information retrieval
+- **Configurable**: `k` parameter controls rank decay (default: 60)
+- **Weight support**: Allows algorithm-specific importance
+
+## Implementation Plan
+
+### Phase 1: Extract and Unify Algorithms (Week 1)
+
+1. Create `nextcloud_mcp_server/search/` module
+2. Implement `algorithms.py` with base interface
+3. Extract semantic search logic from `server/semantic.py`
+4. Implement keyword search from ADR-001 design
+5. Extract fuzzy search from viz pane
+6. Implement RRF hybrid search from ADR-003
+7. Add comprehensive unit tests for each algorithm
+
+### Phase 2: Update MCP Tool (Week 1-2)
+
+1. Add `algorithm` parameter to `nc_semantic_search()`
+2. Add weight parameters (`semantic_weight`, etc.)
+3. Implement algorithm dispatcher
+4. Add parameter validation (weights sum ≤1.0)
+5. Update response model to include algorithm metadata
+6. Maintain backward compatibility (default: hybrid)
+7. Add integration tests for all algorithm modes
+
+### Phase 3: Update Viz Pane (Week 2)
+
+**Critical: All processing must happen server-side**
+
+1. **Remove client-side search filtering**
+   - Delete JavaScript-based keyword/fuzzy matching
+   - Remove client-side document type filtering
+   - No search logic in browser
+2. **Implement server-side endpoint** (`/app/vector-viz`)
+   - Accept query, algorithm, weights, doc_type filters
+   - Execute search via `search/algorithms.py`
+   - Filter results by user_id (security)
+   - Perform PCA reduction (768-dim → 2D)
+   - Return JSON with 2D coordinates + metadata only
+3. **Update frontend**
+   - htmx form submission to `/app/vector-viz`
+   - Algorithm selector dropdown
+   - Weight adjustment sliders (htmx updates on change)
+   - Document type checkboxes
+   - Plotly.js visualization of server response
+4. **Performance optimization**
+   - Limit results to user's documents only
+   - Cache PCA transformation (invalidate on new vectors)
+   - Stream large result sets if needed
+   - Add loading indicators for server processing
+
+### Phase 4: Documentation and Testing (Week 2-3)
+
+1. Update MCP tool documentation
+2. Add algorithm selection guide
+3. Document weight tuning recommendations
+4. Add end-to-end tests (MCP + viz pane)
+5. Performance benchmarks for each algorithm
+6. Update CLAUDE.md with search patterns
+
+## Consequences
+
+### Positive
+
+1. **Flexibility**: MCP clients can optimize search for their use case
+2. **Unified implementation**: Single source of truth for search algorithms
+3. **User empowerment**: Viz pane enables query testing and tuning
+4. **Backward compatible**: Existing semantic search behavior preserved
+5. **Extensible**: Easy to add new algorithms (BM25, neural reranking)
+6. **Testable**: Each algorithm can be unit tested independently
+7. **Standards-based**: RRF is proven in production systems
+
+### Negative
+
+1. **Complexity**: More parameters for clients to understand
+2. **API surface**: Larger tool signature (8 parameters)
+3. **Performance**: Hybrid search requires multiple queries
+4. **Validation overhead**: Weight validation adds processing
+5. **Documentation burden**: Need to explain when to use each algorithm
+
+### Neutral
+
+1. **Weight defaults**: May need tuning based on user feedback
+2. **Algorithm performance**: Will vary by content type and query
+3. **Viz pane adoption**: Unknown if users will utilize testing interface
+
+## Alternatives Considered
+
+### Alternative 1: Separate Tools Per Algorithm
+
+```python
+@mcp.tool()
+async def nc_semantic_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure vector similarity search."""
+
+@mcp.tool()
+async def nc_keyword_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure keyword matching."""
+
+@mcp.tool()
+async def nc_hybrid_search(query: str, ctx: Context, weights: dict, ...) -> SearchResponse:
+    """Hybrid search with weights."""
+```
+
+**Rejected because**:
+- API proliferation (3+ tools instead of 1)
+- Harder to discover capabilities
+- Backward compatibility issues
+- DRY violation (repeated parameters)
+
+### Alternative 2: Server-Wide Configuration Only
+
+```python
+# .env configuration
+SEARCH_ALGORITHM=hybrid
+SEMANTIC_WEIGHT=0.5
+KEYWORD_WEIGHT=0.3
+FUZZY_WEIGHT=0.2
+```
+
+**Rejected because**:
+- No per-query flexibility
+- MCP clients cannot optimize for different tasks
+- Requires server restart for changes
+- User's requirement: "expose a way for users to override the default weights"
+
+### Alternative 3: Production-Grade Fuzzy (Levenshtein/RapidFuzz)
+
+**Rejected because**:
+- Adds external dependency
+- Simple character overlap performs adequately
+- Can always upgrade later if needed
+- User's preference: "Keep simple character overlap"
+
+## Related ADRs
+
+- **ADR-001**: Enhanced Note Search (keyword algorithm design)
+- **ADR-003**: Vector Database and Semantic Search (hybrid search + RRF design)
+- **ADR-007**: Background Vector Sync (semantic search implementation)
+- **ADR-008**: MCP Sampling for RAG (uses semantic search results)
+- **ADR-009**: Semantic Search OAuth Scope (security model)
+- **ADR-011**: Improving Semantic Search Quality (mentions future "ADR-013" for hybrid search)
+
+**This ADR supersedes**:
+- ADR-011's placeholder for "ADR-013: Hybrid Search"
+
+**This ADR implements**:
+- ADR-003's hybrid search design (previously unimplemented)
+- ADR-001's keyword search design (previously unimplemented)
+
+## References
+
+- **Reciprocal Rank Fusion**: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). "Reciprocal rank fusion outperforms condorcet and individual rank learning methods." SIGIR '09.
+- **Vector Search**: Malkov, Y. A., & Yashunin, D. A. (2018). "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs." TPAMI.
+- **Hybrid Search Best Practices**: Qdrant documentation on hybrid search patterns
+- **MCP Protocol**: Model Context Protocol specification for tool design
+
+## Implementation Notes
+
+### Weight Validation
+
+```python
+def validate_weights(
+    semantic_weight: float,
+    keyword_weight: float,
+    fuzzy_weight: float
+) -> None:
+    """Validate hybrid search weights."""
+    if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+        raise ValueError("Weights must be non-negative")
+
+    total = semantic_weight + keyword_weight + fuzzy_weight
+    if total > 1.0:
+        raise ValueError(f"Weights sum to {total:.2f}, must be ≤1.0")
+
+    if total == 0.0:
+        raise ValueError("At least one weight must be > 0")
+```
+
+### Backward Compatibility
+
+The default behavior (`algorithm="hybrid"` with balanced weights) provides better results than current pure semantic search, while maintaining the same tool name and signature structure. Existing clients will automatically benefit from hybrid search without code changes.
+
+### Performance Considerations
+
+- **Semantic search**: ~50-200ms (vector DB query)
+- **Keyword search**: ~10-50ms (in-memory token matching)
+- **Fuzzy search**: ~20-100ms (character comparison)
+- **Hybrid search**: ~100-300ms (parallel execution + fusion)
+
+Parallel execution of algorithms minimizes hybrid search latency.
+
+### Security Model
+
+All algorithms respect the same security boundaries:
+1. **User filtering**: Qdrant queries filter by `user_id`
+2. **Access verification**: Results verified via Nextcloud API
+3. **OAuth scope**: `semantic:read` required for all algorithms
+4. **Viz pane**: Shows only current user's documents
+
+## Success Metrics
+
+1. **Adoption**: % of MCP clients using algorithm parameter
+2. **Performance**: Search latency percentiles (p50, p95, p99)
+3. **Quality**: User satisfaction with result relevance
+4. **Viz pane usage**: % of users accessing testing interface
+5. **Weight distribution**: Most common weight configurations
+
+## Future Enhancements
+
+1. **Additional algorithms**: BM25, TF-IDF, neural reranking
+2. **Auto-tuning**: Learn optimal weights per user
+3. **Query analysis**: Automatic algorithm selection based on query
+4. **Cross-app search**: Extend beyond notes to calendar, files, etc.
+5. **Feedback loop**: Use click-through rate to improve weights
@@ -0,0 +1,254 @@
+## ADR-013: RAG Evaluation Testing Framework
+
+**Status:** Proposed
+
+**Date:** 2025-11-15
+
+### Context
+
+The `nc_semantic_search_answer` tool implements a Retrieval-Augmented Generation (RAG) system where:
+1. **Retrieval**: Vector sync pipeline indexes Nextcloud documents (notes, calendar, contacts, etc.) into a vector database
+2. **Generation**: MCP client's LLM synthesizes answers from retrieved documents via MCP sampling (ADR-008)
+
+We need a testing framework to evaluate RAG system performance and identify whether failures occur in retrieval (wrong documents found) or generation (poor answer quality). This framework must use industry-standard evaluation methodologies while remaining practical to implement and maintain.
+
+To establish a baseline, we will use the **BeIR/nfcorpus** dataset (medical/biomedical corpus) with ~5,000 documents and established query/answer pairs.
+
+Homepage: https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/
+Download: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip
+
+### Decision
+
+We will implement a **two-part evaluation framework** that independently tests retrieval and generation quality using pytest fixtures.
+
+#### In Scope
+
+**1. Retrieval Evaluation**
+Tests the vector sync/embedding pipeline's ability to find relevant documents.
+
+- **Metric: Context Recall** (Did we retrieve documents containing the answer?)
+  - **Evaluation method**: Heuristic - Check if ground-truth document IDs appear in top-k retrieval results
+  - **Test**: Query → Semantic search → Assert expected doc IDs present
+
+**2. Generation Evaluation**
+Tests the MCP client LLM's ability to synthesize correct answers from retrieved context.
+
+- **Metric: Answer Correctness** (Is the generated answer factually correct?)
+  - **Evaluation method**: LLM-as-judge - Compare RAG answer against ground-truth answer
+  - **Test**: Query → `nc_semantic_search_answer` → LLM evaluates answer vs. ground truth (binary true/false)
+
+#### Out of Scope (Initial Implementation)
+
+- **Context Relevance/Precision**: Measuring irrelevant documents in retrieval results
+- **Faithfulness/Groundedness**: Detecting hallucinations not supported by retrieved context
+- **Answer Relevance**: Whether answer addresses the specific question asked
+- **Out-of-Scope Handling**: Testing "I don't know" responses when answer isn't in context
+- **Continuous benchmarking**: Automated tracking of metric trends over time
+- **Custom domain datasets**: Production-specific test data (medical corpus used initially)
+
+These remain valuable for future iterations but add complexity beyond our initial goals.
+
+#### Implementation
+
+**Test Structure**
+
+Location: `tests/rag_evaluation/`
+- `test_retrieval_quality.py` - Retrieval evaluation tests
+- `test_generation_quality.py` - Generation evaluation tests
+- `conftest.py` - Fixtures for test data, MCP clients, and evaluation LLMs
+
+**Required Pytest Fixtures**
+
+1. **`nfcorpus_test_data`** (session-scoped)
+   - Downloads/caches BeIR nfcorpus dataset at runtime
+   - Loads 5 pre-selected test queries with:
+     - Query text
+     - Pre-generated ground-truth answer (from `tests/rag_evaluation/fixtures/ground_truth.json`)
+     - Expected document IDs (from qrels with score=2)
+   - Uploads all corpus documents as notes in test Nextcloud instance
+   - Triggers vector sync to index documents
+   - Waits for indexing completion
+   - Returns test case data structure
+
+2. **`mcp_sampling_client`** (session-scoped)
+   - Creates MCP client that supports sampling
+   - Configurable LLM provider (ollama or anthropic) via environment:
+     - `RAG_EVAL_PROVIDER=ollama` (default) or `anthropic`
+     - `RAG_EVAL_OLLAMA_BASE_URL=http://localhost:11434`
+     - `RAG_EVAL_OLLAMA_MODEL=llama3.1:8b`
+     - `RAG_EVAL_ANTHROPIC_API_KEY=sk-...`
+     - `RAG_EVAL_ANTHROPIC_MODEL=claude-3-5-sonnet-20241022`
+   - Returns configured MCP client fixture
+
+3. **`evaluation_llm`** (session-scoped)
+   - Separate LLM instance for evaluation (independent from MCP client)
+   - Same provider configuration as `mcp_sampling_client`
+   - Returns callable: `async def evaluate(prompt: str) -> str`
+
+**Test Implementation Examples**
+
+```python
+# tests/rag_evaluation/test_retrieval_quality.py
+async def test_retrieval_recall(nc_client, nfcorpus_test_data):
+    """Test that semantic search retrieves documents containing the answer."""
+    for test_case in nfcorpus_test_data:
+        # Perform semantic search (retrieval only, no generation)
+        results = await nc_client.notes.semantic_search(
+            query=test_case.query,
+            limit=10
+        )
+
+        retrieved_doc_ids = {r.document_id for r in results}
+        expected_doc_ids = set(test_case.expected_document_ids)
+
+        # Context Recall: Are expected documents in top-k results?
+        recall = len(expected_doc_ids & retrieved_doc_ids) / len(expected_doc_ids)
+        assert recall >= 0.8, f"Recall {recall} below threshold for query: {test_case.query}"
+
+
+# tests/rag_evaluation/test_generation_quality.py
+async def test_answer_correctness(mcp_sampling_client, evaluation_llm, nfcorpus_test_data):
+    """Test that RAG system generates factually correct answers."""
+    for test_case in nfcorpus_test_data:
+        # Execute full RAG pipeline (retrieval + generation)
+        result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": test_case.query, "limit": 5}
+        )
+
+        rag_answer = result["generated_answer"]
+
+        # LLM-as-judge evaluation
+        evaluation_prompt = f"""Compare these two answers and respond with only TRUE or FALSE.
+
+Question: {test_case.query}
+
+Generated Answer: {rag_answer}
+
+Ground Truth Answer: {test_case.ground_truth}
+
+Are these answers semantically equivalent (do they convey the same factual information)?
+Respond with only: TRUE or FALSE"""
+
+        evaluation_result = await evaluation_llm(evaluation_prompt)
+
+        assert evaluation_result.strip().upper() == "TRUE", \
+            f"Answer mismatch for query: {test_case.query}\nGot: {rag_answer}\nExpected: {test_case.ground_truth}"
+```
+
+**Dataset Integration**
+
+The BeIR nfcorpus dataset structure:
+- **corpus.jsonl**: 3,633 medical/biomedical documents (articles from PubMed)
+- **queries.jsonl**: 3,237 queries (questions)
+- **qrels/*.tsv**: Relevance judgments mapping query IDs to document IDs with scores (2=highly relevant, 1=somewhat relevant)
+
+**Important**: The dataset provides relevance judgments (which documents answer which queries) but does NOT include ground truth answers. We must generate synthetic ground truth offline.
+
+**Selected Test Queries** (5 diverse candidates):
+
+1. **PLAIN-2630**: "Alkylphenol Endocrine Disruptors and Allergies" (5 words, 21 highly relevant docs)
+2. **PLAIN-2660**: "How Long to Detox From Fish Before Pregnancy?" (8 words, 20 highly relevant docs)
+3. **PLAIN-2510**: "Coffee and Artery Function" (4 words, 16 highly relevant docs)
+4. **PLAIN-2430**: "Preventing Brain Loss with B Vitamins?" (6 words, 15 highly relevant docs)
+5. **PLAIN-2690**: "Chronic Headaches and Pork Tapeworms" (5 words, 14 highly relevant docs)
+
+**Ground Truth Generation** (offline, pre-test):
+
+Ground truth answers will be generated offline using a script that:
+1. Loads nfcorpus dataset
+2. For each selected query, extracts top 3-5 highly relevant documents
+3. Uses an LLM (ollama/anthropic) to synthesize a reference answer
+4. Stores ground truth in `tests/rag_evaluation/fixtures/ground_truth.json`
+
+```python
+# tools/generate_rag_ground_truth.py
+async def generate_ground_truth(query: str, relevant_docs: List[dict], llm: LLMProvider) -> str:
+    """Generate synthetic ground truth answer from highly relevant documents."""
+    context = "\n\n".join([
+        f"Document {i+1}:\nTitle: {doc['title']}\n{doc['text']}"
+        for i, doc in enumerate(relevant_docs[:5])
+    ])
+
+    prompt = f"""Based on the following documents, provide a comprehensive answer to this question:
+
+Question: {query}
+
+{context}
+
+Provide a factual, well-structured answer that synthesizes information from the documents.
+Focus on accuracy and completeness."""
+
+    return await llm.generate(prompt, max_tokens=500)
+```
+
+**Dataset Loading at Test Runtime** (in `nfcorpus_test_data` fixture):
+
+1. Download nfcorpus dataset (cached in pytest temp directory)
+2. Load corpus, queries, and qrels (relevance judgments)
+3. Load pre-generated ground truth from `tests/rag_evaluation/fixtures/ground_truth.json`
+4. Upload all corpus documents as Nextcloud notes
+5. Trigger vector sync to index documents
+6. Wait for indexing completion
+7. Return test cases with query, ground truth, and expected doc IDs
+
+**LLM Provider Abstraction**
+
+```python
+# tests/rag_evaluation/llm_providers.py
+class LLMProvider(Protocol):
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str: ...
+
+class OllamaProvider:
+    def __init__(self, base_url: str, model: str):
+        self.base_url = base_url
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        # Use httpx to call Ollama API
+        ...
+
+class AnthropicProvider:
+    def __init__(self, api_key: str, model: str):
+        self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return message.content[0].text
+```
+
+### Consequences
+
+**Positive:**
+
+* **Actionable debugging**: Separate retrieval/generation tests pinpoint failure location
+* **Industry-standard metrics**: Context Recall and Answer Correctness are recognized RAG evaluation metrics
+* **Simple initial implementation**: Binary LLM evaluation (true/false) is straightforward to implement and interpret
+* **Extensible framework**: Easy to add more metrics (faithfulness, relevance) later
+* **Standardized benchmark**: nfcorpus provides objective comparison against published RAG systems
+* **Hybrid evaluation**: Combines efficiency (heuristics for retrieval) with quality (LLM-as-judge for generation)
+* **Provider flexibility**: Supports both local (Ollama) and cloud (Anthropic) LLM evaluation
+
+**Negative:**
+
+* **Medical domain bias**: nfcorpus is medical/biomedical content, may not represent production use cases (personal notes, calendar events, etc.)
+* **Manual test execution**: Tests require external LLM access and are not integrated into CI pipeline
+* **Limited initial coverage**: Starting with only 5 queries provides limited statistical confidence
+* **Evaluation cost**: LLM-as-judge for generation evaluation incurs API costs (Anthropic) or requires local inference (Ollama)
+* **Single metric per component**: Initial scope tests only one metric per component, missing other important quality dimensions
+* **Synthetic ground truth**: Ground truth answers are LLM-generated, not human-validated, which may introduce evaluation bias
+* **Large corpus upload**: Uploading 3,633 documents at test runtime may be slow; caching strategy needed
+
+**Future Work:**
+
+* Expand to 50-100 queries for statistical significance
+* Add custom test dataset with production-representative documents (meeting notes, task lists, etc.)
+* Implement additional metrics (faithfulness, context relevance, answer relevance)
+* Create automated benchmarking dashboard to track metric trends
+* Test multi-hop reasoning (synthesis questions requiring multiple documents)
+* Evaluate out-of-scope handling ("I don't know" responses)
@@ -0,0 +1,241 @@
+# ADR-014: Replace Custom Keyword Search with BM25 Hybrid Search via Qdrant
+
+**Date:** 2025-11-16
+
+**Status:** Implemented
+
+---
+
+### 1. Context
+
+Our RAG application currently employs two separate retrieval mechanisms:
+1.  **Dense (Semantic) Search:** Using vector embeddings stored in our Qdrant database to find semantically similar context.
+2.  **Keyword Search:** A custom-built fuzzy/character-based search to match-specific keywords, acronyms, and product codes that semantic search often misses.
+
+This dual-system approach has several drawbacks:
+* **Poor Relevance:** Our current keyword search is basic (e.g., `LIKE` queries or simple fuzzy matching). It is not as effective as modern full-text search algorithms like BM25.
+* **Clunky Fusion:** We lack a robust, principled method to combine the results from the two systems. This leads to disjointed logic in the application layer and suboptimal context being passed to the LLM.
+* **Architectural Complexity:** We must maintain two separate search pathways (one to Qdrant, one to the keyword search mechanism), increasing code complexity and maintenance overhead.
+
+Our vector database, **Qdrant**, natively supports **hybrid search** by combining dense vectors with BM25-based **sparse vectors** in a single collection.
+
+### 2. Decision
+
+We will **deprecate and remove** the existing custom keyword/fuzzy search functionality.
+
+We will **replace it by implementing native hybrid search within Qdrant**. This involves:
+1.  **Modifying the Qdrant Collection:** Updating our collection to support a named sparse vector index configured for BM25.
+2.  **Updating the Ingestion Pipeline:** For every document chunk, we will generate and upsert *both*:
+    * Its **dense vector** (from our existing embedding model).
+    * Its **sparse vector** (generated using a BM25-compatible model, e.g., `Qdrant/bm25` from `fastembed`).
+3.  **Refactoring Retrieval Logic:** All retrieval calls will be consolidated into a single Qdrant query using the `query_points` endpoint. This query will use the `prefetch` parameter to execute both dense and sparse searches, and Qdrant's built-in **Reciprocal Rank Fusion (RRF)** to automatically merge the results into a single, relevance-ranked list.
+4.  **Backfilling:** A one-time migration script will be created to generate and add sparse vectors for all existing documents in the Qdrant collection.
+
+---
+
+### 3. Considered Options
+
+#### Option 1: Native Qdrant Hybrid Search (Chosen)
+* Use Qdrant's built-in sparse vector and RRF capabilities.
+* **Pros:**
+    * **Consolidated Architecture:** Manages both dense and sparse indexes in one database.
+    * **No Data Sync Issues:** Updates are atomic. A single `upsert` updates both representations.
+    * **Built-in Fusion:** RRF is handled natively and efficiently by the database.
+    * **Superior Relevance:** Replaces our brittle custom search with the industry-standard BM25.
+* **Cons:**
+    * Requires a one-time data backfill which may be time-consuming.
+    * Adds a new step (sparse vector generation) to the ingestion pipeline.
+
+#### Option 2: External Full-Text Search (e.g., Elasticsearch)
+* Keep Qdrant for dense search and add a separate Elasticsearch/OpenSearch cluster for BM25.
+* **Pros:**
+    * Provides a very powerful, dedicated full-text search engine.
+* **Cons:**
+    * **High Complexity:** Introduces a new, stateful service to deploy, manage, and scale.
+    * **Data Sync Nightmare:** We would be responsible for ensuring that the document IDs and content in Qdrant and Elasticsearch are always perfectly synchronized. This is a major source of bugs.
+    * **Manual Fusion:** The application would have to query both systems and perform RRF manually.
+
+#### Option 3: Keep Current System
+* Make no changes.
+* **Pros:**
+    * No engineering effort required.
+* **Cons:**
+    * Fails to address the known relevance and architectural problems.
+    * Our RAG application's performance will remain suboptimal, especially for keyword-sensitive queries.
+
+---
+
+### 4. Rationale
+
+**Option 1 is the clear winner.** It directly solves our primary problem (poor keyword matching) by adopting the industry-standard BM25.
+
+Critically, it achieves this while **simplifying** our overall architecture, not complicating it. By leveraging features already present in our existing database (Qdrant), we avoid the massive operational and synchronization overhead of adding a second search system (Option 2).
+
+This decision consolidates our retrieval logic, eliminates the data consistency problem, and moves the complex fusion logic (RRF) from the application layer into the database, where it can be performed more efficiently.
+
+### 5. Consequences
+
+**New Work:**
+* **Ingestion:** The data ingestion pipeline must be updated to add the `fastembed` library (or similar), generate sparse vectors, and upsert them to the new named vector field in Qdrant.
+* **Retrieval:** The application's retrieval service must be refactored to use the `query_points` endpoint with `prefetch` and `fusion=models.Fusion.RRF`.
+* **Migration:** A one-time backfill script must be written and executed to add sparse vectors for all existing documents.
+* **Infrastructure:** The Qdrant collection schema must be updated (or re-created) to add the `sparse_vectors_config`.
+
+**Positive:**
+* **Improved Accuracy:** Retrieval will be significantly more accurate, handling both semantic and keyword queries robustly.
+* **Simplified Code:** The application's retrieval logic will be cleaner and simpler, with one endpoint instead of two.
+* **Reduced Maintenance:** We will remove the custom fuzzy-search code, which is brittle and difficult to maintain.
+
+**Negative:**
+* The data backfill process will require careful management to avoid downtime.
+* Ingestion time will slightly increase due to the extra step of sparse vector generation. This is considered a negligible trade-off for the gains in relevance.
+
+---
+
+### 6. Implementation Notes
+
+**Implementation completed on 2025-11-16**
+
+**Key Changes:**
+
+1. **Dependencies** (pyproject.toml:25):
+   - Added `fastembed>=0.4.2` for BM25 sparse vector embeddings
+   - Adjusted `pillow` version constraint to be compatible with fastembed
+
+2. **Qdrant Collection Schema** (nextcloud_mcp_server/vector/qdrant_client.py:113-128):
+   - Updated to named vectors: `{"dense": VectorParams(...), "sparse": SparseVectorParams(...)}`
+   - Added sparse vector configuration with BM25 index
+   - Maintains backward compatibility with existing collections (detects legacy schema)
+
+3. **BM25 Embedding Provider** (nextcloud_mcp_server/embedding/bm25_provider.py):
+   - Created `BM25SparseEmbeddingProvider` using FastEmbed's `Qdrant/bm25` model
+   - Implements `encode()` and `encode_batch()` methods
+   - Returns sparse vectors as `{indices: list[int], values: list[float]}` format
+
+4. **Document Indexing Pipeline** (nextcloud_mcp_server/vector/processor.py:229-255):
+   - Generates both dense (semantic) and sparse (BM25) embeddings for each document chunk
+   - Updates `PointStruct` to use named vectors: `vector={"dense": ..., "sparse": ...}`
+   - Maintains same chunking strategy (512 words, 50-word overlap)
+
+5. **BM25 Hybrid Search Algorithm** (nextcloud_mcp_server/search/bm25_hybrid.py):
+   - Implements `BM25HybridSearchAlgorithm` using Qdrant's native RRF fusion
+   - Uses `prefetch` parameter for parallel dense + sparse search
+   - Applies `fusion=models.Fusion.RRF` for automatic result merging
+   - Maintains same deduplication and filtering logic as semantic search
+
+6. **MCP Tool Updates** (nextcloud_mcp_server/server/semantic.py:39-68):
+   - Simplified `nc_semantic_search()` to use BM25 hybrid only
+   - Removed `algorithm`, `semantic_weight`, `keyword_weight`, `fuzzy_weight` parameters
+   - Updated default `score_threshold=0.0` for RRF scoring
+   - Returns `search_method="bm25_hybrid"` in responses
+
+7. **Legacy Algorithm Removal**:
+   - Deleted `nextcloud_mcp_server/search/keyword.py` (278 lines)
+   - Deleted `nextcloud_mcp_server/search/fuzzy.py` (220 lines)
+   - Deleted `nextcloud_mcp_server/search/hybrid.py` (238 lines - custom RRF)
+   - Updated `nextcloud_mcp_server/search/__init__.py` to export only BM25 hybrid
+
+**Migration Strategy:**
+- No migration required (vector sync feature is experimental)
+- New documents automatically indexed with both dense + sparse vectors
+- Collection re-creation on first startup with updated schema
+
+**Test Results:**
+- All unit tests passing (118 passed)
+- All integration tests passing (7 semantic search tests)
+- Code formatting verified with ruff
+
+**Benefits Realized:**
+- ✅ Consolidated architecture (single Qdrant database for both dense + sparse)
+- ✅ Native fusion algorithms (database-level, more efficient)
+- ✅ Industry-standard BM25 (replaces custom keyword search)
+- ✅ Simplified codebase (removed 736 lines of legacy code)
+- ✅ Better relevance (handles both semantic and keyword queries)
+- ✅ Configurable fusion methods (RRF and DBSF)
+
+---
+
+### 7. Fusion Algorithm Options
+
+**Update: 2025-11-16**
+
+The BM25 hybrid search now supports two fusion algorithms for combining dense (semantic) and sparse (BM25) search results:
+
+#### Reciprocal Rank Fusion (RRF)
+
+**Default fusion method.** RRF is a widely-used, well-established algorithm that combines rankings from multiple retrieval systems using the reciprocal rank formula:
+
+```
+RRF(doc) = Σ 1/(k + rank_i(doc))
+```
+
+where `k` is a constant (typically 60) and `rank_i(doc)` is the rank of the document in retrieval system `i`.
+
+**Characteristics:**
+- ✅ **General-purpose**: Works well across diverse query types and document collections
+- ✅ **Rank-based**: Focuses on relative rankings rather than absolute scores
+- ✅ **Established**: Well-tested, documented, and understood in IR literature
+- ✅ **Robust**: Less sensitive to score distribution differences between systems
+
+**When to use RRF:**
+- Default choice for most use cases
+- When you have mixed query types (semantic + keyword)
+- When retrieval systems have very different score ranges
+- When you want predictable, well-understood behavior
+
+#### Distribution-Based Score Fusion (DBSF)
+
+**Alternative fusion method.** DBSF normalizes scores from each retrieval system using distribution statistics before combining them:
+
+1. **Normalization**: For each query, calculates mean (μ) and standard deviation (σ) of scores
+2. **Outlier handling**: Uses μ ± 3σ as normalization bounds
+3. **Fusion**: Sums normalized scores across systems
+
+**Characteristics:**
+- ✅ **Score-aware**: Uses actual relevance scores, not just rankings
+- ✅ **Statistical**: Normalizes based on score distribution properties
+- ⚠️ **Experimental**: Newer algorithm, less battle-tested than RRF
+- ⚠️ **Sensitive**: May behave differently depending on score distributions
+
+**When to use DBSF:**
+- When retrieval systems have vastly different score ranges that RRF doesn't balance well
+- When you want to experiment with score-based (vs rank-based) fusion
+- When statistical normalization better matches your use case
+- For A/B testing against RRF to measure retrieval quality improvements
+
+#### Configuration
+
+Both fusion algorithms are exposed via the `fusion` parameter in MCP tools:
+
+```python
+# Use RRF (default)
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf"  # Can be omitted, RRF is default
+)
+
+# Use DBSF
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="dbsf"
+)
+```
+
+The `nc_semantic_search_answer` tool also supports the `fusion` parameter and passes it through to the underlying search.
+
+#### Future: Configurable Weights
+
+**Current limitation**: Neither RRF nor DBSF currently support per-system weights (e.g., 0.8 for semantic, 0.2 for BM25). This is a Qdrant platform limitation tracked in [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067).
+
+When Qdrant adds weight support, the `fusion` parameter can be extended to accept weight configurations:
+
+```python
+# Hypothetical future API
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf",
+    fusion_weights={"dense": 0.7, "sparse": 0.3}  # Not yet implemented
+)
+```
+
+**Recommendation**: Start with RRF (default). If you encounter cases where keyword matches are under- or over-weighted, experiment with DBSF. Monitor [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067) for configurable weight support.
@@ -0,0 +1,380 @@
+# ADR-015: Unified Provider Architecture for Embeddings and Text Generation
+
+**Status:** Accepted
+**Date:** 2025-01-16
+**Deciders:** Development Team
+**Related:** ADR-003 (Vector Database), ADR-008 (MCP Sampling), ADR-013 (RAG Evaluation)
+
+## Context
+
+Prior to this refactoring, the codebase had two separate provider systems:
+
+1. **Embedding Providers** (`nextcloud_mcp_server/embedding/`)
+   - Used `EmbeddingProvider` ABC with methods: `embed()`, `embed_batch()`, `get_dimension()`
+   - Had auto-detection via `EmbeddingService._detect_provider()`
+   - Used for semantic search and vector indexing (production)
+
+2. **LLM Providers** (`tests/rag_evaluation/llm_providers.py`)
+   - Used `LLMProvider` Protocol with method: `generate()`
+   - Had separate factory function `create_llm_provider()`
+   - Used only for RAG evaluation tests (not production)
+
+This fragmentation created several problems:
+
+### Problems with Dual Provider Systems
+
+1. **Code Duplication**
+   - Ollama configuration appeared in both `embedding/service.py` and `tests/rag_evaluation/llm_providers.py`
+   - Similar provider detection logic in multiple places
+   - Separate singleton patterns for each system
+
+2. **Limited Extensibility**
+   - Hard-coded provider detection in `EmbeddingService._detect_provider()`
+   - No support for providers that offer both capabilities (like Bedrock)
+   - Adding new providers required modifying multiple files
+
+3. **Inconsistent Patterns**
+   - BM25 provider didn't follow `EmbeddingProvider` ABC
+   - Different method names across providers (`embed` vs `encode`)
+   - ABC vs Protocol for type checking
+
+4. **Difficult Scaling**
+   - Adding Amazon Bedrock (our third provider) would exacerbate all issues
+   - No clear path for future providers (OpenAI, Cohere, etc.)
+
+### Amazon Bedrock Requirements
+
+Bedrock naturally supports **both** embeddings and text generation:
+- **Embeddings**: `amazon.titan-embed-text-v1/v2`, `cohere.embed-*`
+- **Text Generation**: `anthropic.claude-*`, `meta.llama3-*`, `amazon.titan-text-*`
+- **Unified API**: Single `invoke_model()` method via bedrock-runtime
+
+This made it the perfect opportunity to establish a unified provider architecture.
+
+## Decision
+
+We refactored the provider infrastructure to use a **unified Provider ABC** with optional capabilities:
+
+### 1. Unified Provider Interface
+
+**New Structure:**
+```
+nextcloud_mcp_server/providers/
+├── __init__.py
+├── base.py              # Provider ABC with optional capabilities
+├── registry.py          # Auto-detection and factory
+├── ollama.py            # Supports both embedding + generation
+├── anthropic.py         # Generation only
+├── bedrock.py           # Supports both embedding + generation
+└── simple.py            # Embedding only (testing fallback)
+```
+
+**Base Class (`providers/base.py`):**
+```python
+class Provider(ABC):
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate batch embeddings (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """Get embedding dimension (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """Generate text (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close provider and release resources."""
+        pass
+```
+
+### 2. Provider Registry
+
+**Auto-Detection Priority** (`providers/registry.py`):
+```python
+class ProviderRegistry:
+    @staticmethod
+    def create_provider() -> Provider:
+        # 1. Bedrock (AWS_REGION or BEDROCK_*_MODEL)
+        # 2. Ollama (OLLAMA_BASE_URL)
+        # 3. Simple (fallback)
+```
+
+**Environment Variables:**
+
+**Bedrock:**
+- `AWS_REGION`: AWS region (e.g., "us-east-1")
+- `AWS_ACCESS_KEY_ID`: AWS access key (optional, uses credential chain)
+- `AWS_SECRET_ACCESS_KEY`: AWS secret key (optional)
+- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL`: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+**Ollama:**
+- `OLLAMA_BASE_URL`: Ollama API base URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL`: Model for embeddings (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL`: Model for text generation (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL`: Verify SSL certificates (default: "true")
+
+**Simple (no configuration, fallback):**
+- `SIMPLE_EMBEDDING_DIMENSION`: Embedding dimension (default: 384)
+
+### 3. Backward Compatibility
+
+**Old Code Continues to Work:**
+```python
+# Old way (still works)
+from nextcloud_mcp_server.embedding import get_embedding_service
+
+service = get_embedding_service()  # Returns singleton Provider
+embeddings = await service.embed_batch(texts)
+```
+
+**New Way (recommended):**
+```python
+# New way (cleaner)
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Returns singleton Provider
+embeddings = await provider.embed_batch(texts)
+
+# Can also use generation if provider supports it
+if provider.supports_generation:
+    text = await provider.generate("prompt")
+```
+
+**Migration Path:**
+- `embedding/service.py` now wraps `providers.get_provider()` for compatibility
+- `tests/rag_evaluation/llm_providers.py` now uses unified providers
+- Old imports still work, marked as deprecated in docstrings
+
+### 4. Amazon Bedrock Implementation
+
+**Features:**
+- Supports both embeddings and text generation
+- Model-specific request/response handling for:
+  - Titan Embed (amazon.titan-embed-text-*)
+  - Cohere Embed (cohere.embed-*)
+  - Claude (anthropic.claude-*)
+  - Llama (meta.llama3-*)
+  - Titan Text (amazon.titan-text-*)
+  - Mistral (mistral.*)
+- Uses boto3 bedrock-runtime client
+- Graceful degradation if boto3 not installed
+- Async implementation matching existing patterns
+
+**Model-Specific Handling:**
+```python
+# Bedrock embedding request (Titan)
+{"inputText": text}
+
+# Bedrock generation request (Claude)
+{
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": max_tokens,
+    "temperature": 0.7,
+    "messages": [{"role": "user", "content": prompt}]
+}
+```
+
+## Consequences
+
+### Positive
+
+1. **Sustainable Provider Additions**
+   - New providers only need to implement `Provider` ABC
+   - Auto-detection via environment variables
+   - No modifications to existing code required
+
+2. **Code Consolidation**
+   - Single provider interface instead of two
+   - Unified configuration pattern
+   - Eliminated duplication
+
+3. **Better Extensibility**
+   - Providers can support one or both capabilities
+   - Clear capability detection via properties
+   - Registry pattern simplifies auto-detection
+
+4. **Improved Testing**
+   - RAG evaluation can use any provider (Ollama, Anthropic, Bedrock)
+   - Comprehensive unit tests for all providers
+   - Mocked boto3 tests for Bedrock
+
+5. **Production-Ready Bedrock Support**
+   - Full embedding and generation support
+   - Multiple model families supported
+   - AWS credential chain integration
+
+### Neutral
+
+1. **Optional Boto3 Dependency**
+   - boto3 is dev dependency only (not required for core functionality)
+   - Bedrock provider gracefully fails if boto3 not installed
+   - Users who want Bedrock must `pip install boto3`
+
+2. **Capability Properties**
+   - All providers must implement capability properties
+   - Methods raise `NotImplementedError` if capability not supported
+   - Clear error messages guide users to alternatives
+
+### Negative
+
+1. **Migration Effort**
+   - Existing code must be migrated to new imports (optional, backward compatible)
+   - Documentation needs updating
+   - Users must learn new environment variables
+
+2. **Increased Complexity**
+   - Provider base class has more methods (embedding + generation)
+   - More environment variables to configure
+   - Capability detection adds runtime checks
+
+## Implementation
+
+### Files Created
+
+**New Provider Infrastructure:**
+- `nextcloud_mcp_server/providers/__init__.py`
+- `nextcloud_mcp_server/providers/base.py`
+- `nextcloud_mcp_server/providers/registry.py`
+- `nextcloud_mcp_server/providers/ollama.py`
+- `nextcloud_mcp_server/providers/anthropic.py`
+- `nextcloud_mcp_server/providers/bedrock.py`
+- `nextcloud_mcp_server/providers/simple.py`
+
+**Tests:**
+- `tests/unit/providers/__init__.py`
+- `tests/unit/providers/test_bedrock.py` (9 unit tests)
+
+**Documentation:**
+- `docs/ADR-015-unified-provider-architecture.md` (this file)
+
+### Files Modified
+
+**Backward Compatibility:**
+- `nextcloud_mcp_server/embedding/service.py` - Now wraps `get_provider()`
+- `tests/rag_evaluation/llm_providers.py` - Uses unified providers
+
+**Dependencies:**
+- `pyproject.toml` - Added `boto3>=1.35.0` to dev dependencies
+
+### Testing Results
+
+**Unit Tests:** 127 passed (including 9 new Bedrock tests)
+**Type Checking:** All checks passed (ty)
+**Linting:** All checks passed (ruff)
+**Backward Compatibility:** Verified - existing embedding tests work
+
+## Alternatives Considered
+
+### Alternative 1: Keep Separate Provider Systems
+
+**Pros:**
+- No refactoring needed
+- Simpler short-term
+
+**Cons:**
+- Bedrock would need to be implemented twice
+- Continued code duplication
+- No long-term scalability
+
+**Decision:** Rejected - technical debt would continue to grow
+
+### Alternative 2: Separate Embedding and Generation Providers
+
+Use composition instead of unified interface:
+```python
+class CombinedProvider:
+    def __init__(self, embedding: EmbeddingProvider, generation: LLMProvider):
+        self.embedding = embedding
+        self.generation = generation
+```
+
+**Pros:**
+- Clearer separation of concerns
+- Simpler individual providers
+
+**Cons:**
+- Bedrock and Ollama naturally do both - artificial separation
+- More complex configuration (two providers to configure)
+- More boilerplate code
+
+**Decision:** Rejected - unified interface better matches provider capabilities
+
+### Alternative 3: Plugin System
+
+Dynamic provider registration via entry points:
+```python
+# setup.py
+entry_points={
+    'nextcloud_mcp.providers': [
+        'ollama = nextcloud_mcp_server.providers.ollama:OllamaProvider',
+        'bedrock = nextcloud_mcp_server.providers.bedrock:BedrockProvider',
+    ]
+}
+```
+
+**Pros:**
+- Most extensible
+- Third-party providers possible
+
+**Cons:**
+- Over-engineered for current needs
+- Added complexity
+- No immediate benefit
+
+**Decision:** Deferred - can add later if needed
+
+## Future Work
+
+1. **Additional Providers**
+   - OpenAI (embeddings + generation)
+   - Cohere (embeddings + generation)
+   - Google Vertex AI
+   - Azure OpenAI
+
+2. **Provider Features**
+   - Streaming generation support
+   - Batch API optimization (when available)
+   - Model-specific optimizations
+   - Cost tracking and metrics
+
+3. **Configuration Improvements**
+   - Provider profiles (development, production)
+   - Model aliasing (e.g., "small", "large")
+   - Fallback provider chains
+
+4. **Testing**
+   - Integration tests with real Bedrock endpoints
+   - Performance benchmarking across providers
+   - Cost comparison analysis
+
+## References
+
+- [boto3 Bedrock Runtime Documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Amazon Bedrock User Guide](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html)
+- ADR-003: Vector Database and Semantic Search
+- ADR-008: MCP Sampling for Semantic Search
+- ADR-013: RAG Evaluation Framework
@@ -0,0 +1,492 @@
+# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
+
+**Status:** Proposed
+**Date:** 2025-01-22
+**Deciders:** Development Team
+**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
+
+## Context
+
+[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
+
+- **Discovery**: Marketplace listing for MCP servers
+- **Hosting**: Containerized deployment with auto-scaling
+- **Authentication UI**: OAuth flow presentation for users
+- **Session Configuration**: Per-user settings passed via URL parameters
+- **Observability**: Usage logs and monitoring
+
+### Current Architecture Limitations
+
+The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
+
+1. **Persistent Infrastructure**
+   - Qdrant vector database for semantic search
+   - Background sync worker for content indexing
+   - Refresh token storage for offline access
+
+2. **Single-Tenant Configuration**
+   - Environment variables configure one Nextcloud instance
+   - `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
+   - Or OAuth with a single IdP
+
+3. **Stateful Operations**
+   - Vector sync maintains index state across requests
+   - Token storage persists between sessions
+
+### Smithery Hosting Constraints
+
+Smithery-hosted containers are **stateless by design**:
+
+- No persistent storage between requests
+- No background workers or cron jobs
+- No databases (Qdrant, Redis, etc.)
+- Containers may be recycled at any time
+- Configuration passed per-session via URL parameters
+
+### Opportunity
+
+Many users have **publicly accessible Nextcloud instances** and want to:
+
+1. Try the MCP server without self-hosting infrastructure
+2. Connect multiple users to different Nextcloud instances
+3. Use basic Nextcloud tools without semantic search
+4. Benefit from Smithery's discovery and OAuth UI
+
+## Decision
+
+Implement a **stateless deployment mode** for Smithery that:
+
+1. **Disables stateful features** (vector sync, semantic search)
+2. **Creates clients per-session** from Smithery configuration
+3. **Supports multiple Nextcloud instances** via session config
+4. **Provides a useful subset of tools** that work without infrastructure
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                    Smithery-Hosted Stateless Mode                        │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                          │
+│  MCP Client                    Smithery                                  │
+│  (Cursor, Claude)              Infrastructure                            │
+│        │                            │                                    │
+│        │ 1. Connect                 │                                    │
+│        ├───────────────────────────►│                                    │
+│        │                            │                                    │
+│        │ 2. Config UI               │                                    │
+│        │◄───────────────────────────┤  User enters:                      │
+│        │    (Smithery presents)     │  - nextcloud_url                   │
+│        │                            │  - auth_mode (basic/oauth)         │
+│        │                            │  - credentials                     │
+│        │ 3. Tool call               │                                    │
+│        ├───────────────────────────►│                                    │
+│        │    + session config        │                                    │
+│        │                            │                                    │
+│        │                    ┌───────┴───────┐                            │
+│        │                    │  MCP Server   │                            │
+│        │                    │  Container    │                            │
+│        │                    │               │                            │
+│        │                    │ 4. Create     │                            │
+│        │                    │    client     │                            │
+│        │                    │    from       │                            │
+│        │                    │    config     │                            │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │                    │ 5. Call       │                            │
+│        │                    │    Nextcloud  │───────► User's Nextcloud   │
+│        │                    │    API        │         Instance           │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │ 6. Response        │ Return result │                            │
+│        │◄───────────────────┤               │                            │
+│        │                    └───────────────┘                            │
+│                                                                          │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Session Configuration Schema
+
+```python
+from pydantic import BaseModel, Field
+
+class SmitheryConfigSchema(BaseModel):
+    """Configuration schema for Smithery session."""
+
+    # Required: Nextcloud instance
+    nextcloud_url: str = Field(
+        ...,
+        description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+    )
+
+    # Authentication mode
+    auth_mode: str = Field(
+        "app_password",
+        description="Authentication method: 'app_password' or 'oauth'"
+    )
+
+    # App Password authentication (recommended for Smithery)
+    username: str | None = Field(
+        None,
+        description="Nextcloud username (required for app_password auth)"
+    )
+    app_password: str | None = Field(
+        None,
+        description="Nextcloud app password (Settings → Security → App passwords)"
+    )
+
+    # OAuth authentication (advanced)
+    # When auth_mode='oauth', Smithery handles the OAuth flow
+    # and passes the access token automatically
+```
+
+### Feature Matrix
+
+| Feature | Self-Hosted | Smithery Stateless |
+|---------|-------------|-------------------|
+| **Notes** | | |
+| List/Search notes | ✓ | ✓ |
+| Get/Create/Update notes | ✓ | ✓ |
+| Semantic search | ✓ | ✗ |
+| **Calendar** | | |
+| List calendars | ✓ | ✓ |
+| Get/Create events | ✓ | ✓ |
+| **Contacts** | | |
+| List address books | ✓ | ✓ |
+| Search/Get contacts | ✓ | ✓ |
+| **Files (WebDAV)** | | |
+| List/Download files | ✓ | ✓ |
+| Upload files | ✓ | ✓ |
+| Search files | ✓ | ✓ (keyword only) |
+| **Deck** | | |
+| List boards/cards | ✓ | ✓ |
+| Create/Update cards | ✓ | ✓ |
+| **Tables** | | |
+| List/Query tables | ✓ | ✓ |
+| Create/Update rows | ✓ | ✓ |
+| **Cookbook** | | |
+| List/Get recipes | ✓ | ✓ |
+| **Semantic Search** | | |
+| Vector search | ✓ | ✗ |
+| RAG answers | ✓ | ✗ |
+| **Background Sync** | | |
+| Auto-indexing | ✓ | ✗ |
+| Webhook sync | ✓ | ✗ |
+| **Admin UI (`/app`)** | | |
+| Vector sync status | ✓ | ✗ |
+| Vector visualization | ✓ | ✗ |
+| Webhook management | ✓ | ✗ |
+| Session management | ✓ | ✗ |
+
+### Implementation
+
+#### 1. Deployment Mode Detection
+
+```python
+# nextcloud_mcp_server/config.py
+
+class DeploymentMode(Enum):
+    SELF_HOSTED = "self_hosted"      # Full features, env-based config
+    SMITHERY_STATELESS = "smithery"  # Stateless, session-based config
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment."""
+    if os.getenv("SMITHERY_DEPLOYMENT") == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+```
+
+#### 2. Session-Based Client Factory
+
+```python
+# nextcloud_mcp_server/context.py
+
+async def get_client(ctx: Context) -> NextcloudClient:
+    """Get NextcloudClient - from session config or environment."""
+
+    mode = get_deployment_mode()
+
+    if mode == DeploymentMode.SMITHERY_STATELESS:
+        # Create client from Smithery session config
+        config = ctx.session_config
+        if not config:
+            raise McpError("Session configuration required")
+
+        return NextcloudClient(
+            base_url=config.nextcloud_url,
+            username=config.username,
+            password=config.app_password,
+        )
+    else:
+        # Existing behavior: from environment or OAuth context
+        return await _get_client_from_context(ctx)
+```
+
+#### 3. Conditional Tool Registration
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_mcp_server(mode: DeploymentMode) -> FastMCP:
+    """Create MCP server with mode-appropriate tools."""
+
+    mcp = FastMCP("Nextcloud MCP")
+
+    # Always register core tools
+    configure_notes_tools(mcp)
+    configure_calendar_tools(mcp)
+    configure_contacts_tools(mcp)
+    configure_webdav_tools(mcp)
+    configure_deck_tools(mcp)
+    configure_tables_tools(mcp)
+    configure_cookbook_tools(mcp)
+
+    # Only register stateful tools in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        configure_semantic_tools(mcp)  # Requires Qdrant
+        register_oauth_tools(mcp)       # Requires token storage
+
+    return mcp
+```
+
+#### 4. Exclude Admin UI Routes
+
+The `/app` admin UI should **not be installed** in Smithery mode because:
+
+- **Vector sync status** - No vector sync in stateless mode
+- **Vector visualization** - No Qdrant to visualize
+- **Webhook management** - No webhook sync without background workers
+- **Session management** - No persistent sessions to manage
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_app(mode: DeploymentMode) -> Starlette:
+    """Create Starlette app with mode-appropriate routes."""
+
+    routes = [
+        Route("/health/live", health_live, methods=["GET"]),
+        Route("/health/ready", health_ready, methods=["GET"]),
+    ]
+
+    # Only mount admin UI in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        browser_app = create_browser_app()
+        routes.append(
+            Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
+        )
+        routes.append(Mount("/app", app=browser_app))
+        logger.info("Admin UI mounted at /app")
+    else:
+        logger.info("Admin UI disabled in Smithery stateless mode")
+
+    # Mount FastMCP at root
+    mcp_app = create_mcp_server(mode).streamable_http_app()
+    routes.append(Mount("/", app=mcp_app))
+
+    return Starlette(routes=routes, lifespan=starlette_lifespan)
+```
+
+**Endpoints by Mode:**
+
+| Endpoint | Self-Hosted | Smithery |
+|----------|-------------|----------|
+| `/mcp` | ✓ | ✓ |
+| `/health/live` | ✓ | ✓ |
+| `/health/ready` | ✓ | ✓ |
+| `/.well-known/mcp-config` | ✓ | ✓ |
+| `/app` | ✓ | ✗ |
+| `/app/vector-sync/status` | ✓ | ✗ |
+| `/app/vector-viz` | ✓ | ✗ |
+| `/app/webhooks` | ✓ | ✗ |
+
+#### 5. Smithery Integration Files
+
+**smithery.yaml:**
+```yaml
+runtime: "container"
+build:
+  dockerfile: "Dockerfile.smithery"
+  dockerBuildPath: "."
+startCommand:
+  type: "http"
+  configSchema:
+    type: "object"
+    required: ["nextcloud_url", "username", "app_password"]
+    properties:
+      nextcloud_url:
+        type: "string"
+        title: "Nextcloud URL"
+        description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+      username:
+        type: "string"
+        title: "Username"
+        description: "Your Nextcloud username"
+      app_password:
+        type: "string"
+        title: "App Password"
+        description: "Generate at Settings → Security → App passwords"
+  exampleConfig:
+    nextcloud_url: "https://cloud.example.com"
+    username: "alice"
+    app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
+```
+
+**Dockerfile.smithery:**
+```dockerfile
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+
+# Copy project files
+COPY pyproject.toml uv.lock ./
+COPY nextcloud_mcp_server ./nextcloud_mcp_server
+
+# Install dependencies (without vector/semantic extras)
+RUN uv sync --frozen --no-dev
+
+# Set Smithery mode
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081
+EXPOSE 8081
+
+CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
+```
+
+**nextcloud_mcp_server/smithery_main.py:**
+```python
+"""Smithery-specific entrypoint for stateless deployment."""
+
+import os
+import uvicorn
+from starlette.middleware.cors import CORSMiddleware
+
+from nextcloud_mcp_server.app import create_mcp_server
+from nextcloud_mcp_server.config import DeploymentMode
+
+def main():
+    # Force stateless mode
+    os.environ["SMITHERY_DEPLOYMENT"] = "true"
+    os.environ["VECTOR_SYNC_ENABLED"] = "false"
+
+    mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
+    app = mcp.streamable_http_app()
+
+    # Add CORS for browser-based clients
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["GET", "POST", "OPTIONS"],
+        allow_headers=["*"],
+        expose_headers=["mcp-session-id", "mcp-protocol-version"],
+    )
+
+    # Smithery sets PORT environment variable
+    port = int(os.environ.get("PORT", 8081))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+
+if __name__ == "__main__":
+    main()
+```
+
+### Security Considerations
+
+1. **App Passwords over User Passwords**
+   - Smithery config encourages app passwords (revocable, scoped)
+   - Documentation guides users to create dedicated app passwords
+   - App passwords can be revoked without changing main password
+
+2. **HTTPS Required**
+   - `nextcloud_url` must be HTTPS for production use
+   - Validation rejects HTTP URLs in Smithery mode
+
+3. **No Credential Storage**
+   - Credentials exist only for request duration
+   - No server-side persistence of user credentials
+   - Smithery handles secure config transmission
+
+4. **Scope Limitation**
+   - Stateless mode cannot access offline_access
+   - No background operations on user's behalf
+   - Clear user expectation: tools work during session only
+
+### Migration Path
+
+Users can start with Smithery stateless mode and migrate to self-hosted:
+
+1. **Try on Smithery** → Basic tools, no setup
+2. **Self-host for semantic search** → Add Qdrant, enable vector sync
+3. **Full deployment** → Background sync, webhooks, multi-user OAuth
+
+## Consequences
+
+### Positive
+
+1. **Lower barrier to entry** - Users can try without infrastructure
+2. **Multi-user support** - Each session connects to different Nextcloud
+3. **Smithery ecosystem** - Discovery, observability, OAuth UI
+4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
+
+### Negative
+
+1. **No semantic search** - Key differentiator unavailable on Smithery
+2. **Per-request auth** - Credentials sent with each request
+3. **No offline access** - Cannot perform background operations
+4. **Maintenance burden** - Two deployment modes to support
+
+### Neutral
+
+1. **Feature subset** - May encourage users to self-host for full features
+2. **Documentation needs** - Clear guidance on mode differences required
+
+## Alternatives Considered
+
+### 1. External MCP Only
+
+**Approach:** Only support self-hosted external MCP registration on Smithery.
+
+**Rejected because:**
+- Higher barrier to entry for new users
+- Misses opportunity for Smithery marketplace visibility
+- Users want to try before committing to infrastructure
+
+### 2. Embedded Vector DB (SQLite-vec)
+
+**Approach:** Use SQLite with vector extensions for per-request indexing.
+
+**Rejected because:**
+- No persistence between requests anyway
+- Indexing latency too high for synchronous requests
+- Complexity without benefit in stateless context
+
+### 3. External Vector DB Service
+
+**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
+
+**Rejected because:**
+- Adds external dependency and cost
+- Per-user collections require complex multi-tenancy
+- Sync still impossible without background workers
+
+### 4. Hybrid: Smithery + User's Qdrant
+
+**Approach:** User provides their own Qdrant URL in session config.
+
+**Considered for future:**
+- Could enable semantic search for advanced users
+- Adds complexity to session config
+- Sync still requires external trigger (manual or webhook)
+
+## References
+
+- [Smithery Documentation](https://smithery.ai/docs)
+- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
+- [Smithery External MCPs](https://smithery.ai/docs/build/external)
+- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
+- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
@@ -0,0 +1,338 @@
+# Amazon Bedrock Setup Guide
+
+This guide covers how to configure the Nextcloud MCP Server to use Amazon Bedrock for embeddings and text generation.
+
+## Prerequisites
+
+1. **AWS Account** with access to Amazon Bedrock
+2. **boto3 library** installed: `pip install boto3` or `uv sync --group dev`
+3. **Model Access** - Request access to models in AWS Bedrock console
+
+## Required AWS Permissions
+
+### IAM Policy for Bedrock Access
+
+The AWS IAM user or role needs the following permissions:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockInvokeModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel",
+        "bedrock:InvokeModelWithResponseStream"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+### Minimal Permissions (Production)
+
+For production deployments, restrict to specific models:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockEmbeddings",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0"
+      ]
+    },
+    {
+      "Sid": "BedrockGeneration",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"
+      ]
+    }
+  ]
+}
+```
+
+### Additional Permissions (Optional)
+
+For advanced use cases:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockListModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:ListFoundationModels",
+        "bedrock:GetFoundationModel"
+      ],
+      "Resource": "*"
+    },
+    {
+      "Sid": "BedrockAsyncInvoke",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModelAsync",
+        "bedrock:GetAsyncInvoke",
+        "bedrock:ListAsyncInvokes"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+## Model Access
+
+Before using Bedrock models, you must request access in the AWS Console:
+
+1. Navigate to **Amazon Bedrock** → **Model access**
+2. Click **Manage model access**
+3. Select models you want to use:
+   - **Embeddings:** Amazon Titan Embed Text, Cohere Embed
+   - **Text Generation:** Anthropic Claude, Meta Llama, Amazon Titan Text
+4. Click **Request model access**
+5. Wait for approval (usually instant for most models)
+
+## Supported Models
+
+### Embedding Models
+
+| Provider | Model ID | Dimensions | Best For |
+|----------|----------|------------|----------|
+| Amazon Titan | `amazon.titan-embed-text-v1` | 1,536 | General purpose |
+| Amazon Titan | `amazon.titan-embed-text-v2:0` | 1,024 | Latest, improved quality |
+| Cohere | `cohere.embed-english-v3` | 1,024 | English text |
+| Cohere | `cohere.embed-multilingual-v3` | 1,024 | Multilingual |
+
+### Text Generation Models
+
+| Provider | Model ID | Context | Best For |
+|----------|----------|---------|----------|
+| Anthropic | `anthropic.claude-3-sonnet-20240229-v1:0` | 200K | Balanced performance |
+| Anthropic | `anthropic.claude-3-haiku-20240307-v1:0` | 200K | Fast, cost-effective |
+| Anthropic | `anthropic.claude-3-opus-20240229-v1:0` | 200K | Highest quality |
+| Meta | `meta.llama3-8b-instruct-v1:0` | 8K | Fast, open-source |
+| Meta | `meta.llama3-70b-instruct-v1:0` | 8K | High quality |
+| Amazon | `amazon.titan-text-express-v1` | 8K | Fast, low cost |
+| Mistral | `mistral.mistral-7b-instruct-v0:2` | 32K | Efficient |
+
+## Configuration
+
+### Environment Variables
+
+**Required:**
+```bash
+AWS_REGION=us-east-1
+```
+
+**Optional (at least one model required):**
+```bash
+# For embeddings
+BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+
+# For text generation (RAG evaluation)
+BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+```
+
+**AWS Credentials (choose one method):**
+
+**Method 1: Environment Variables**
+```bash
+AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
+AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 2: AWS Credentials File** (`~/.aws/credentials`)
+```ini
+[default]
+aws_access_key_id = AKIAIOSFODNN7EXAMPLE
+aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 3: IAM Role** (when running on AWS EC2/ECS/Lambda)
+- No credentials needed, uses instance/task role automatically
+
+### Docker Configuration
+
+Add to your `docker-compose.yml`:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+      - BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+```
+
+Or use AWS credentials file volume mount:
+
+```yaml
+services:
+  mcp:
+    volumes:
+      - ~/.aws:/root/.aws:ro
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+```
+
+## Usage Examples
+
+### Embeddings Only
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export AWS_ACCESS_KEY_ID=your-key
+export AWS_SECRET_ACCESS_KEY=your-secret
+
+uv run nextcloud-mcp-server
+```
+
+### Both Embeddings and Generation
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+# For RAG evaluation with Bedrock
+export RAG_EVAL_PROVIDER=bedrock
+export RAG_EVAL_BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+uv run python -m tests.rag_evaluation.evaluate
+```
+
+### Programmatic Usage
+
+```python
+from nextcloud_mcp_server.providers import BedrockProvider
+
+# Embeddings only
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+)
+
+embeddings = await provider.embed_batch(["text1", "text2"])
+
+# Both capabilities
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+    generation_model="anthropic.claude-3-sonnet-20240229-v1:0",
+)
+
+# Generate embeddings
+embedding = await provider.embed("query text")
+
+# Generate text
+response = await provider.generate("Write a summary", max_tokens=500)
+```
+
+## Cost Considerations
+
+### Embedding Costs (as of Jan 2025)
+
+| Model | Price per 1K tokens |
+|-------|---------------------|
+| Titan Embed Text v2 | $0.0001 |
+| Cohere Embed English v3 | $0.0001 |
+
+### Generation Costs (as of Jan 2025)
+
+| Model | Input (per 1K tokens) | Output (per 1K tokens) |
+|-------|----------------------|------------------------|
+| Claude 3 Haiku | $0.00025 | $0.00125 |
+| Claude 3 Sonnet | $0.003 | $0.015 |
+| Claude 3 Opus | $0.015 | $0.075 |
+| Llama 3 8B | $0.0003 | $0.0006 |
+| Titan Text Express | $0.0002 | $0.0006 |
+
+**Note:** Prices vary by region. Check [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/) for current rates.
+
+## Troubleshooting
+
+### Error: "Executable doesn't exist" or boto3 not found
+
+**Solution:**
+```bash
+uv sync --group dev  # Installs boto3
+```
+
+### Error: "AccessDeniedException"
+
+**Causes:**
+1. IAM permissions missing
+2. Model access not requested
+3. Wrong AWS region
+
+**Solution:**
+1. Verify IAM policy includes `bedrock:InvokeModel`
+2. Request model access in Bedrock console
+3. Check model is available in your region
+
+### Error: "ResourceNotFoundException"
+
+**Cause:** Invalid model ID or model not available in region
+
+**Solution:**
+- Verify model ID matches exactly (case-sensitive)
+- Check model availability in your AWS region
+- Use `aws bedrock list-foundation-models` to see available models
+
+### Error: "ThrottlingException"
+
+**Cause:** Rate limit exceeded
+
+**Solution:**
+- Reduce request rate
+- Request quota increase via AWS Support
+- Use batch operations where possible
+
+## Security Best Practices
+
+1. **Use IAM Roles** when running on AWS infrastructure
+2. **Rotate Access Keys** regularly if using IAM users
+3. **Restrict Permissions** to only required models
+4. **Enable CloudTrail** for audit logging
+5. **Use AWS Secrets Manager** for credential management
+6. **Monitor Costs** with AWS Cost Explorer and Budgets
+
+## Regional Availability
+
+Amazon Bedrock is available in:
+- **US East (N. Virginia)**: `us-east-1` ✅ Most models
+- **US West (Oregon)**: `us-west-2` ✅ Most models
+- **Asia Pacific (Singapore)**: `ap-southeast-1`
+- **Asia Pacific (Tokyo)**: `ap-northeast-1`
+- **Europe (Frankfurt)**: `eu-central-1`
+
+**Note:** Model availability varies by region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) for current availability.
+
+## References
+
+- [AWS Bedrock Documentation](https://docs.aws.amazon.com/bedrock/)
+- [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/)
+- [boto3 Bedrock Runtime API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Provider Architecture ADR](./ADR-015-unified-provider-architecture.md)
@@ -16,8 +16,7 @@ The Nextcloud MCP Server includes comprehensive observability features for produ
 export METRICS_ENABLED=true
 export METRICS_PORT=9090

-# Enable tracing (optional)
-export OTEL_ENABLED=true
+# Enable tracing (optional - tracing is enabled when OTEL_EXPORTER_OTLP_ENDPOINT is set)
 export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317

 # Start the server
@@ -46,8 +45,7 @@ helm install nextcloud-mcp charts/nextcloud-mcp-server \
 |----------|---------|-------------|
 | `METRICS_ENABLED` | `true` | Enable Prometheus metrics |
 | `METRICS_PORT` | `9090` | Port for metrics endpoint |
-| `OTEL_ENABLED` | `false` | Enable OpenTelemetry tracing |
-| `OTEL_EXPORTER_OTLP_ENDPOINT` | - | OTLP gRPC endpoint (e.g., `http://otel-collector:4317`) |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | - | OTLP gRPC endpoint (e.g., `http://otel-collector:4317`). Tracing is enabled when this is set. |
 | `OTEL_SERVICE_NAME` | `nextcloud-mcp-server` | Service name in traces |
 | `OTEL_TRACES_SAMPLER` | `always_on` | Trace sampling strategy |
 | `OTEL_TRACES_SAMPLER_ARG` | `1.0` | Sampling rate (0.0-1.0) |
@@ -245,7 +243,7 @@ If you see cardinality warnings:
 The observability stack integrates at multiple layers:

 1. **HTTP Layer**: `ObservabilityMiddleware` tracks all HTTP requests
-2. **MCP Layer**: Tools use `@trace_mcp_tool` for span creation
+2. **MCP Layer**: Tools use `@instrument_tool` for automatic metrics and trace span creation
 3. **Client Layer**: `BaseNextcloudClient` tracks all API calls
 4. **OAuth Layer**: Token operations are traced and metered
 5. **Background Tasks**: Vector sync operations emit metrics/traces
@@ -0,0 +1,93 @@
+# Vector Sync UI Guide
+
+This guide covers the browser-based interface for the Nextcloud MCP Server's semantic search and vector synchronization features.
+
+## Overview
+
+The Vector Sync UI (`/app`) provides an interactive interface to test semantic search queries and visualize results from your Nextcloud documents. It exposes the same retrieval capabilities that LLMs use in Retrieval-Augmented Generation (RAG) workflows, powered by Alpine.js for reactive state, htmx for dynamic updates, and Plotly.js for 3D visualization.
+
+**Supported Apps**: Notes, Files (text/PDF), Calendar (events/tasks), Contacts (CardDAV), and Deck are indexed and searchable.
+
+## Accessing the UI
+
+Navigate to `/app` after authentication:
+- **BasicAuth mode**: `http://localhost:8000/app` (uses credentials from environment)
+- **OAuth mode**: `http://localhost:8000/app` (redirects to login if not authenticated)
+
+## Tabs
+
+### Welcome Page
+
+Landing page that introduces semantic search and RAG workflows. Shows authentication status, explains how vector embeddings work, and provides feature navigation. Adapts content based on whether `VECTOR_SYNC_ENABLED=true`.
+
+### User Info
+
+Displays authentication details and session information:
+- **BasicAuth**: Username, mode badge, Nextcloud host
+- **OAuth**: Username, session ID (truncated), background access status, IdP profile, revocation option
+
+### Vector Sync Status
+
+Real-time monitoring of document indexing:
+- **Indexed Documents**: Total chunks stored in Qdrant vector database (immediately searchable)
+- **Pending Documents**: Queue awaiting embedding processing
+- **Status**: "✓ Idle" (green) when up-to-date, "⟳ Syncing" (orange) during processing
+
+Auto-refreshes every 10 seconds via htmx. Check this tab after adding content to verify indexing completion.
+
+### Vector Visualization
+
+Interactive search interface with 3D PCA plot of semantic space.
+
+**Search Controls**:
+- **Query**: Natural language search (e.g., "health benefits of coffee")
+- **Algorithm**: Semantic (Dense) for pure vector search, or BM25 Hybrid (default) combining vectors + keywords
+- **Fusion** (Hybrid only): RRF (Reciprocal Rank Fusion) or DBSF (Distribution-Based Score Fusion)
+- **Advanced**: Filter by document type, adjust score threshold (0.0-1.0), set result limit (max 100)
+
+**3D Visualization**:
+
+The plot uses Principal Component Analysis (PCA) to reduce 768-dimensional embeddings to 3D. Documents are positioned by semantic similarity with the query point shown in red. Point size and opacity indicate relevance, and the Viridis color scale shows relative scores (yellow = highest match).
+
+**Critical Fix**: Vectors are L2-normalized before PCA to match Qdrant's cosine distance, ensuring query points position accurately near similar documents. Without normalization, magnitude differences cause misleading spatial separation.
+
+**Results List**:
+
+Each result shows document title (clickable link to Nextcloud), excerpt, raw score, relative percentage, and document type. Click "Show Chunk" to view the matched text segment with surrounding context (up to 500 characters before/after).
+
+## Configuration
+
+**Required**:
+```bash
+VECTOR_SYNC_ENABLED=true
+```
+
+**Optional** (for browser-accessible links):
+```bash
+NEXTCLOUD_PUBLIC_ISSUER_URL=https://your-public-nextcloud-url.com
+```
+
+**Admin Access**: Webhooks tab only visible to Nextcloud admins (verified via Provisioning API).
+
+## Use Cases
+
+**Testing Search Queries**: Preview results before they reach LLMs in RAG workflows. Compare semantic vs. hybrid algorithms, verify relevance scores, and validate that correct documents are retrieved. Use chunk context to see exactly which text segments match and why unexpected documents appear.
+
+**Monitoring Indexing**: Track real-time progress after creating or modifying documents. Check if the queue is backing up (high pending count) or confirm the system is idle after bulk imports. Verify documents become searchable immediately after indexing completes.
+
+**Algorithm Comparison**: Pure semantic search excels at conceptual queries and synonyms. BM25 hybrid combines semantic understanding with precise keyword matching for better accuracy on specific terms. Experiment with RRF vs. DBSF fusion for different score distributions.
+
+## Troubleshooting
+
+**Vector Sync Tab Not Visible**: Set `VECTOR_SYNC_ENABLED=true` and restart the server.
+
+**No Search Results**: Check Vector Sync Status to confirm documents are indexed (not just pending). Try broader queries or lower the score threshold in Advanced options. Initial indexing may take time depending on document volume.
+
+**Links to Nextcloud Apps Not Working**: Set `NEXTCLOUD_PUBLIC_ISSUER_URL` to your browser-accessible Nextcloud URL for correct link generation.
+
+## Related Documentation
+
+- [Configuration Guide](../configuration.md) - Environment variables and settings
+- [Authentication Modes](../authentication.md) - BasicAuth vs OAuth setup
+- [Installation Guide](../installation.md) - Getting started
+- [ADR-008: MCP Sampling for RAG](../ADR-008-mcp-sampling-for-rag.md) - Technical details on RAG workflows
@@ -1,7 +1,7 @@
 """Browser-based OAuth login routes for admin UI.

 Separate from MCP OAuth flow - these routes establish browser sessions
-for accessing admin UI endpoints like /user/page.
+for accessing admin UI endpoints like /app.
 """

 import hashlib
@@ -38,8 +38,8 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    """
    oauth_ctx = request.app.state.oauth_context
    if not oauth_ctx:
-        # BasicAuth mode - no login needed, redirect to user page
-        return RedirectResponse("/user/page", status_code=302)
+        # BasicAuth mode - no login needed, redirect to app
+        return RedirectResponse("/app", status_code=302)

    storage = oauth_ctx["storage"]
    oauth_client = oauth_ctx["oauth_client"]
@@ -71,7 +71,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    await storage.store_oauth_session(
        session_id=state,  # Use state as session ID
        client_id="browser-ui",
-        client_redirect_uri="/user/page",
+        client_redirect_uri="/app",
        state=state,
        code_challenge=code_challenge,
        code_challenge_method="S256",
@@ -383,7 +383,7 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
            # Continue anyway - profile cache is optional for browser UI

    # Create response and set session cookie
-    response = RedirectResponse("/user/page", status_code=302)
+    response = RedirectResponse("/app", status_code=302)
    response.set_cookie(
        key="mcp_session",
        value=user_id,
@@ -8,7 +8,7 @@ from typing import Any
 import anyio
 import httpx

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -12,6 +12,10 @@ from mcp.server.fastmcp import Context

 from ..client import NextcloudClient
 from ..config import get_settings
+from ..observability.metrics import (
+    oauth_token_cache_hits_total,
+    oauth_token_exchange_total,
+)
 from .token_exchange import exchange_token_for_audience

 logger = logging.getLogger(__name__)
@@ -138,6 +142,7 @@ async def get_session_client_from_context(
                logger.debug(
                    f"Using cached exchanged token (expires in {expiry - time.time():.1f}s)"
                )
+                oauth_token_cache_hits_total.labels(hit="true").inc()
                return NextcloudClient.from_token(
                    base_url=base_url, token=cached_token, username=username
                )
@@ -145,17 +150,24 @@ async def get_session_client_from_context(
                logger.debug("Cached token expired, removing from cache")
                del _exchange_cache[cache_key]

+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
        # Perform RFC 8693 token exchange
        logger.info(f"Exchanging MCP token for Nextcloud API token (user: {username})")

-        # Exchange for Nextcloud resource URI audience
-        exchanged_token, expires_in = await exchange_token_for_audience(
-            subject_token=mcp_token,
-            requested_audience=settings.nextcloud_resource_uri or "nextcloud",
-            requested_scopes=None,  # Nextcloud doesn't support scopes
-        )
+        try:
+            # Exchange for Nextcloud resource URI audience
+            exchanged_token, expires_in = await exchange_token_for_audience(
+                subject_token=mcp_token,
+                requested_audience=settings.nextcloud_resource_uri or "nextcloud",
+                requested_scopes=None,  # Nextcloud doesn't support scopes
+            )
+            oauth_token_exchange_total.labels(status="success").inc()

-        logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+            logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+        except Exception:
+            oauth_token_exchange_total.labels(status="error").inc()
+            raise

        # Cache the exchanged token
        # Use the minimum of exchange TTL and configured cache TTL
@@ -32,7 +32,7 @@ from starlette.requests import Request
 from starlette.responses import JSONResponse, RedirectResponse

 from nextcloud_mcp_server.auth.client_registry import get_client_registry
-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -0,0 +1,54 @@
+"""Permission checking utilities for Nextcloud admin operations."""
+
+import logging
+
+from httpx import AsyncClient
+from starlette.requests import Request
+
+from nextcloud_mcp_server.client.users import UsersClient
+
+logger = logging.getLogger(__name__)
+
+
+async def is_nextcloud_admin(request: Request, http_client: AsyncClient) -> bool:
+    """Check if the authenticated user is a Nextcloud administrator.
+
+    This function extracts the username from the session/request context
+    and checks if the user is a member of the "admin" group in Nextcloud.
+
+    Args:
+        request: Starlette request object with authenticated user
+        http_client: Authenticated HTTP client for Nextcloud API calls
+
+    Returns:
+        True if user is admin, False otherwise
+
+    Example:
+        ```python
+        if await is_nextcloud_admin(request, http_client):
+            # Show admin-only features
+            pass
+        ```
+    """
+    try:
+        # Extract username from authenticated session
+        username = request.user.display_name
+        if not username:
+            logger.warning("No username found in authenticated session")
+            return False
+
+        # Query Nextcloud for user's group memberships
+        users_client = UsersClient(http_client, username)
+        user_groups = await users_client.get_user_groups(username)
+
+        # Check if user is in the admin group
+        is_admin = "admin" in user_groups
+        logger.debug(
+            f"Admin check for user '{username}': {is_admin} (groups: {user_groups})"
+        )
+
+        return is_admin
+
+    except Exception as e:
+        logger.error(f"Error checking admin permissions: {e}", exc_info=True)
+        return False
@@ -13,7 +13,7 @@ from mcp.server.fastmcp import Context
 from mcp.shared.exceptions import McpError
 from mcp.types import ErrorData

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -0,0 +1,219 @@
+.viz-layout {
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    height: 100%;
+    min-height: 0;
+    overflow-y: auto;
+}
+.viz-card {
+    background: var(--color-main-background);
+    border-radius: 0;
+    padding: 16px;
+    box-shadow: none;
+}
+.viz-controls-card {
+    flex: 0 0 auto;
+    border-bottom: 1px solid var(--color-border);
+    padding-bottom: 16px;
+}
+.viz-controls-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 12px;
+    align-items: end;
+}
+@media (min-width: 768px) {
+    .viz-controls-grid {
+        grid-template-columns: 2fr 1.5fr 1.5fr auto auto;
+    }
+}
+.viz-control-group {
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+}
+.viz-control-group label {
+    font-weight: 500;
+    color: var(--color-main-text);
+    font-size: 13px;
+}
+.viz-control-group input[type="text"],
+.viz-control-group input[type="number"],
+.viz-control-group select {
+    width: 100%;
+    padding: 7px 10px;
+    border: 1px solid var(--color-border-dark);
+    border-radius: var(--border-radius);
+    font-size: 14px;
+    background: var(--color-main-background);
+    color: var(--color-main-text);
+}
+.viz-control-group input:focus,
+.viz-control-group select:focus {
+    outline: none;
+    border-color: var(--color-primary-element);
+}
+.viz-control-group input[type="range"] {
+    width: 100%;
+}
+.viz-control-group select[multiple] {
+    min-height: 100px;
+}
+.viz-weight-display {
+    display: inline-block;
+    min-width: 40px;
+    text-align: right;
+    color: #666;
+}
+.viz-btn {
+    background: var(--color-primary-element);
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    font-weight: 500;
+    white-space: nowrap;
+}
+.viz-btn:hover {
+    background: #0052a3;
+}
+.viz-btn-secondary {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    white-space: nowrap;
+}
+.viz-btn-secondary:hover {
+    background: #5a6268;
+}
+.viz-card-plot {
+    flex: 0 0 auto;
+    display: flex;
+    flex-direction: column;
+    min-height: 500px;
+    height: 600px;
+    /* Remove horizontal padding to extend to full viewport width */
+    padding-left: 0;
+    padding-right: 0;
+    margin-left: -16px;
+    margin-right: -16px;
+}
+#viz-plot-container {
+    width: 100%;
+    height: 100%;
+    position: relative;
+    overflow: visible;
+}
+#viz-plot {
+    width: 100%;
+    height: 100%;
+}
+.viz-loading {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+}
+.viz-loading-overlay {
+    position: absolute;
+    inset: 0;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: white;
+    color: #666;
+}
+.viz-no-results {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+    font-style: italic;
+}
+.viz-advanced-section {
+    margin-top: 12px;
+    padding: 12px;
+    background: var(--color-background-hover);
+    border-radius: var(--border-radius);
+    border: 1px solid var(--color-border);
+}
+.viz-info-box {
+    background: var(--color-primary-element-light);
+    border-left: 3px solid var(--color-primary-element);
+    padding: 10px 12px;
+    margin-bottom: 16px;
+    font-size: 13px;
+    color: var(--color-main-text);
+}
+.chunk-toggle-btn {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 4px 10px;
+    border-radius: 3px;
+    cursor: pointer;
+    font-size: 12px;
+    margin-top: 6px;
+}
+.chunk-toggle-btn:hover {
+    background: #5a6268;
+}
+.chunk-context {
+    background: var(--color-background-hover);
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    padding: 12px;
+    margin-top: 8px;
+    font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+    font-size: 13px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+.chunk-text {
+    color: var(--color-text-maxcontrast);
+}
+.chunk-matched {
+    background: #fff3cd;
+    border: 1px solid #ffc107;
+    padding: 2px 4px;
+    border-radius: var(--border-radius);
+    font-weight: 500;
+    color: var(--color-main-text);
+}
+.chunk-ellipsis {
+    color: var(--color-text-maxcontrast);
+    font-style: italic;
+}
+
+/* PDF highlighted image styles */
+.chunk-image-container {
+    margin-bottom: 16px;
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    overflow: hidden;
+    background: #fff;
+}
+.chunk-image-header {
+    background: var(--color-background-dark);
+    padding: 8px 12px;
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--color-text-maxcontrast);
+    border-bottom: 1px solid var(--color-border);
+    font-family: var(--font-face);
+}
+.chunk-highlighted-image {
+    display: block;
+    max-width: 100%;
+    height: auto;
+    cursor: zoom-in;
+}
+.chunk-highlighted-image:hover {
+    opacity: 0.95;
+}
@@ -0,0 +1,253 @@
+// Initialize vizApp for vector visualization
+function vizApp() {
+    return {
+        query: '',
+        algorithm: 'bm25_hybrid',
+        fusion: 'rrf',
+        showAdvanced: false,
+        showQueryPoint: true,
+        docTypes: [''],
+        limit: 50,
+        scoreThreshold: 0.0,
+        loading: false,
+        results: [],
+        coordinates: null,
+        queryCoords: null,
+        expandedChunks: {},
+        chunkLoading: {},
+
+        init() {
+            // Set up window resize listener to resize plot
+            window.addEventListener('resize', () => {
+                if (this.coordinates && this.results.length > 0) {
+                    Plotly.Plots.resize('viz-plot');
+                }
+            });
+        },
+
+        async executeSearch() {
+            this.loading = true;
+            this.results = [];
+
+            try {
+                const params = new URLSearchParams({
+                    query: this.query,
+                    algorithm: this.algorithm,
+                    limit: this.limit,
+                    score_threshold: this.scoreThreshold,
+                });
+
+                if (this.algorithm === 'bm25_hybrid') {
+                    params.append('fusion', this.fusion);
+                }
+
+                const selectedTypes = this.docTypes.filter(t => t !== '');
+                if (selectedTypes.length > 0) {
+                    params.append('doc_types', selectedTypes.join(','));
+                }
+
+                const response = await fetch(`/app/vector-viz/search?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.results = data.results;
+                    this.coordinates = data.coordinates_3d;
+                    this.queryCoords = data.query_coords;
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                } else {
+                    alert('Search failed: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error: ' + error.message);
+            } finally {
+                this.loading = false;
+            }
+        },
+
+        updatePlot() {
+            // Toggle query point visibility without recreating the plot
+            // This preserves camera position naturally since layout is untouched
+            if (this.coordinates && this.queryCoords && this.results.length > 0) {
+                const plotDiv = document.getElementById('viz-plot');
+
+                // If plot exists, just toggle the query trace visibility
+                if (plotDiv && plotDiv.data && plotDiv.data.length >= 2) {
+                    // Trace index 1 is the query point
+                    Plotly.restyle('viz-plot', { visible: this.showQueryPoint }, [1]);
+                } else {
+                    // Plot doesn't exist yet, render it
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                }
+            }
+        },
+
+        renderPlot(coordinates, queryCoords, results) {
+            // Get container dimensions before creating layout
+            const container = document.getElementById('viz-plot-container');
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+
+            const scores = results.map(r => r.score);
+
+            // Trace 1: Document results (always visible)
+            const documentTrace = {
+                x: coordinates.map(c => c[0]),
+                y: coordinates.map(c => c[1]),
+                z: coordinates.map(c => c[2]),
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Documents',
+                visible: true,
+                customdata: results.map((r, i) => ({
+                    title: r.title,
+                    raw_score: r.original_score,
+                    relative_score: r.score,
+                    x: coordinates[i][0],
+                    y: coordinates[i][1],
+                    z: coordinates[i][2]
+                })),
+                hovertemplate:
+                    '<b>%{customdata.title}</b><br>' +
+                    'Raw Score: %{customdata.raw_score:.3f} (%{customdata.relative_score:.0%} relative)<br>' +
+                    '(x=%{customdata.x}, y=%{customdata.y}, z=%{customdata.z})' +
+                    '<extra></extra>',
+                marker: {
+                    size: results.map(r => 4 + (Math.pow(r.score, 2) * 10)),
+                    opacity: results.map(r => 0.3 + (r.score * 0.7)),
+                    color: scores,
+                    colorscale: 'Viridis',
+                    showscale: true,
+                    colorbar: {
+                        title: 'Relative Score',
+                        x: 1.02,
+                        xanchor: 'left',
+                        thickness: 20,
+                        len: 0.8
+                    },
+                    cmin: 0,
+                    cmax: 1
+                }
+            };
+
+            // Trace 2: Query point (visibility controlled by toggle)
+            const queryTrace = {
+                x: [queryCoords[0]],
+                y: [queryCoords[1]],
+                z: [queryCoords[2]],
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Query',
+                visible: this.showQueryPoint,  // Initial visibility from state
+                hovertemplate:
+                    '<b>Search Query</b><br>' +
+                    `(x=${queryCoords[0]}, y=${queryCoords[1]}, z=${queryCoords[2]})` +
+                    '<extra></extra>',
+                marker: {
+                    size: 10,
+                    color: '#ef5350',  // Subdued red (Material Design Red 400)
+                    line: {
+                        color: '#c62828',  // Darker red border (Material Design Red 800)
+                        width: 1
+                    }
+                }
+            };
+
+            const layout = {
+                title: `Vector Space (PCA 3D) - ${results.length} results`,
+                width: width,   // Explicit width from container
+                height: height, // Explicit height from container
+                scene: {
+                    xaxis: { title: 'PC1' },
+                    yaxis: { title: 'PC2' },
+                    zaxis: { title: 'PC3' },
+                    camera: {
+                        eye: { x: 1.5, y: 1.5, z: 1.5 }
+                    },
+                    // Full width for 3D scene
+                    domain: {
+                        x: [0, 1],
+                        y: [0, 1]
+                    }
+                },
+                hovermode: 'closest',
+                autosize: true,  // Enable auto-sizing for window resizes
+                showlegend: false,  // Hide legend
+                margin: { l: 0, r: 100, t: 40, b: 0 }  // Right margin for colorbar
+            };
+
+            // Always render both traces - visibility is controlled by the visible property
+            const traces = [documentTrace, queryTrace];
+
+            // Enable responsive resizing
+            const config = {
+                responsive: true,
+                displayModeBar: true
+            };
+
+            // Use newPlot() with explicit dimensions - renders at correct size immediately
+            // Camera position will be preserved by subsequent Plotly.restyle() calls in updatePlot()
+            Plotly.newPlot('viz-plot', traces, layout, config);
+        },
+
+        getNextcloudUrl(result) {
+            // Use global NEXTCLOUD_BASE_URL if set, otherwise construct from window location
+            const baseUrl = window.NEXTCLOUD_BASE_URL || '';
+            switch (result.doc_type) {
+                case 'note':
+                    return `${baseUrl}/apps/notes/note/${result.id}`;
+                case 'file':
+                    return `${baseUrl}/apps/files/?fileId=${result.id}`;
+                case 'calendar':
+                    return `${baseUrl}/apps/calendar`;
+                case 'contact':
+                    return `${baseUrl}/apps/contacts`;
+                case 'deck':
+                    return `${baseUrl}/apps/deck`;
+                default:
+                    return `${baseUrl}`;
+            }
+        },
+
+        hasChunkPosition(result) {
+            return result.chunk_start_offset != null && result.chunk_end_offset != null;
+        },
+
+        isChunkExpanded(resultKey) {
+            return this.expandedChunks[resultKey] !== undefined;
+        },
+
+        async toggleChunk(result) {
+            const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`;
+
+            if (this.isChunkExpanded(resultKey)) {
+                delete this.expandedChunks[resultKey];
+                return;
+            }
+
+            this.chunkLoading[resultKey] = true;
+
+            try {
+                const params = new URLSearchParams({
+                    doc_type: result.doc_type,
+                    doc_id: result.id,
+                    start: result.chunk_start_offset,
+                    end: result.chunk_end_offset,
+                    context: 500
+                });
+
+                const response = await fetch(`/app/chunk-context?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.expandedChunks[resultKey] = data;
+                } else {
+                    alert('Failed to load chunk: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error loading chunk: ' + error.message);
+            } finally {
+                delete this.chunkLoading[resultKey];
+            }
+        }
+    };
+}
@@ -1,23 +1,28 @@
 """
-Refresh Token Storage for ADR-002 Tier 1: Offline Access
+Persistent Storage for MCP Server State

-Manages two separate concerns for OAuth authentication:
+This module provides SQLite-based storage for multiple concerns across both
+BasicAuth and OAuth authentication modes:

-1. **Refresh Tokens** (for background jobs ONLY)
+1. **Refresh Tokens** (OAuth mode only, for background jobs)
   - Securely stores encrypted refresh tokens for offline access
   - Used ONLY by background jobs to obtain access tokens
   - NEVER used within MCP client sessions or browser sessions

-2. **User Profile Cache** (for browser UI display ONLY)
+2. **User Profile Cache** (OAuth mode only, for browser UI display)
   - Caches IdP user profile data for browser-based admin UI
   - Queried ONCE at login, displayed from cache thereafter
   - NOT used for authorization decisions or background jobs

-IMPORTANT: These are separate concerns. Browser sessions read profile cache for
-display purposes. Background jobs use refresh tokens for API access. Never mix
-the two.
+3. **Webhook Registration Tracking** (both modes, for webhook management)
+   - Tracks registered webhook IDs mapped to presets
+   - Enables persistent webhook state across restarts
+   - Avoids redundant Nextcloud API calls for webhook status

-Tokens are encrypted at rest using Fernet symmetric encryption.
+IMPORTANT: The database is initialized in both BasicAuth and OAuth modes.
+Token storage requires TOKEN_ENCRYPTION_KEY, but webhook tracking does not.
+
+Sensitive data (tokens, secrets) is encrypted at rest using Fernet symmetric encryption.
 """

 import json
@@ -30,29 +35,40 @@ from typing import Any, Optional
 import aiosqlite
 from cryptography.fernet import Fernet

+from nextcloud_mcp_server.observability.metrics import record_db_operation
+
 logger = logging.getLogger(__name__)


 class RefreshTokenStorage:
-    """Securely store and manage user refresh tokens and profile cache.
+    """Persistent storage for MCP server state (tokens, webhooks, and future features).

-    This class manages two separate concerns:
-    - Refresh tokens: Encrypted storage for background job access (write-only by OAuth, read-only by background jobs)
-    - User profiles: Plain JSON cache for browser UI display (written at login, read by UI)
+    This class manages multiple concerns across both BasicAuth and OAuth modes:

-    These concerns are architecturally separate and should never be mixed.
+    **OAuth-specific concerns**:
+    - Refresh tokens: Encrypted storage for background job access (requires encryption key)
+    - User profiles: Plain JSON cache for browser UI display
+    - OAuth client credentials: Encrypted client secrets from DCR
+    - OAuth sessions: Temporary session state for progressive consent flow
+
+    **Both modes**:
+    - Webhook registration: Track registered webhooks mapped to presets
+    - Schema versioning: Handle database migrations automatically
+
+    Token-related operations require TOKEN_ENCRYPTION_KEY, but webhook operations do not.
    """

-    def __init__(self, db_path: str, encryption_key: bytes):
+    def __init__(self, db_path: str, encryption_key: bytes | None = None):
        """
-        Initialize refresh token storage.
+        Initialize persistent storage.

        Args:
            db_path: Path to SQLite database file
-            encryption_key: Fernet encryption key (32 bytes, base64-encoded)
+            encryption_key: Optional Fernet encryption key (32 bytes, base64-encoded).
+                          Required for token storage operations, not required for webhook tracking.
        """
        self.db_path = db_path
-        self.cipher = Fernet(encryption_key)
+        self.cipher = Fernet(encryption_key) if encryption_key else None
        self._initialized = False

    @classmethod
@@ -62,41 +78,42 @@ class RefreshTokenStorage:

        Environment variables:
            TOKEN_STORAGE_DB: Path to database file (default: /app/data/tokens.db)
-            TOKEN_ENCRYPTION_KEY: Base64-encoded Fernet key
+            TOKEN_ENCRYPTION_KEY: Optional base64-encoded Fernet key (required for token storage)

        Returns:
            RefreshTokenStorage instance

-        Raises:
-            ValueError: If TOKEN_ENCRYPTION_KEY is not set
+        Note:
+            If TOKEN_ENCRYPTION_KEY is not set, token storage operations will fail,
+            but webhook tracking will still work.
        """
        db_path = os.getenv("TOKEN_STORAGE_DB", "/app/data/tokens.db")
        encryption_key_b64 = os.getenv("TOKEN_ENCRYPTION_KEY")

-        if not encryption_key_b64:
-            raise ValueError(
-                "TOKEN_ENCRYPTION_KEY environment variable is required. "
-                "Generate one with: python -c 'from cryptography.fernet import Fernet; "
-                "print(Fernet.generate_key().decode())'"
+        encryption_key = None
+        if encryption_key_b64:
+            # Fernet expects a base64url-encoded key as bytes, not decoded bytes
+            # The key from Fernet.generate_key() is already base64url-encoded
+            try:
+                # Convert string to bytes if needed
+                if isinstance(encryption_key_b64, str):
+                    encryption_key = encryption_key_b64.encode()
+                else:
+                    encryption_key = encryption_key_b64
+
+                # Validate the key by trying to create a Fernet instance
+                Fernet(encryption_key)
+            except Exception as e:
+                raise ValueError(
+                    f"Invalid TOKEN_ENCRYPTION_KEY: {e}. "
+                    "Must be a valid Fernet key (base64url-encoded 32 bytes)."
+                ) from e
+        else:
+            logger.info(
+                "TOKEN_ENCRYPTION_KEY not set - token storage operations will be unavailable, "
+                "but webhook tracking will still work"
            )

-        # Fernet expects a base64url-encoded key as bytes, not decoded bytes
-        # The key from Fernet.generate_key() is already base64url-encoded
-        try:
-            # Convert string to bytes if needed
-            if isinstance(encryption_key_b64, str):
-                encryption_key = encryption_key_b64.encode()
-            else:
-                encryption_key = encryption_key_b64
-
-            # Validate the key by trying to create a Fernet instance
-            Fernet(encryption_key)
-        except Exception as e:
-            raise ValueError(
-                f"Invalid TOKEN_ENCRYPTION_KEY: {e}. "
-                "Must be a valid Fernet key (base64url-encoded 32 bytes)."
-            ) from e
-
        return cls(db_path=db_path, encryption_key=encryption_key)

    async def initialize(self) -> None:
@@ -204,6 +221,38 @@ class RefreshTokenStorage:
                "ON oauth_sessions(mcp_authorization_code)"
            )

+            # Schema version tracking
+            await db.execute(
+                """
+                CREATE TABLE IF NOT EXISTS schema_version (
+                    version INTEGER PRIMARY KEY,
+                    applied_at REAL NOT NULL
+                )
+                """
+            )
+
+            # Registered webhooks tracking (both BasicAuth and OAuth modes)
+            await db.execute(
+                """
+                CREATE TABLE IF NOT EXISTS registered_webhooks (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    webhook_id INTEGER NOT NULL UNIQUE,
+                    preset_id TEXT NOT NULL,
+                    created_at REAL NOT NULL
+                )
+                """
+            )
+
+            # Create indexes for efficient webhook queries
+            await db.execute(
+                "CREATE INDEX IF NOT EXISTS idx_webhooks_preset "
+                "ON registered_webhooks(preset_id)"
+            )
+            await db.execute(
+                "CREATE INDEX IF NOT EXISTS idx_webhooks_created "
+                "ON registered_webhooks(created_at)"
+            )
+
            await db.commit()

        # Set restrictive permissions after creation
@@ -245,35 +294,43 @@ class RefreshTokenStorage:
        # For Flow 2, set provisioned_at timestamp
        provisioned_at = now if flow_type == "flow2" else None

-        async with aiosqlite.connect(self.db_path) as db:
-            await db.execute(
-                """
-                INSERT OR REPLACE INTO refresh_tokens
-                (user_id, encrypted_token, expires_at, created_at, updated_at,
-                 flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
-                VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
-                        ?, ?, ?, ?, ?)
-                """,
-                (
-                    user_id,
-                    encrypted_token,
-                    expires_at,
-                    user_id,
-                    now,
-                    now,
-                    flow_type,
-                    token_audience,
-                    provisioned_at,
-                    provisioning_client_id,
-                    scopes_json,
-                ),
-            )
-            await db.commit()
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                await db.execute(
+                    """
+                    INSERT OR REPLACE INTO refresh_tokens
+                    (user_id, encrypted_token, expires_at, created_at, updated_at,
+                     flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
+                    VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
+                            ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        user_id,
+                        encrypted_token,
+                        expires_at,
+                        user_id,
+                        now,
+                        now,
+                        flow_type,
+                        token_audience,
+                        provisioned_at,
+                        provisioning_client_id,
+                        scopes_json,
+                    ),
+                )
+                await db.commit()
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "success")

-        logger.info(
-            f"Stored refresh token for user {user_id}"
-            + (f" (expires at {expires_at})" if expires_at else "")
-        )
+            logger.info(
+                f"Stored refresh token for user {user_id}"
+                + (f" (expires at {expires_at})" if expires_at else "")
+            )
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "error")
+            raise

        # Audit log
        await self._audit_log(
@@ -375,40 +432,45 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            async with db.execute(
-                """
-                SELECT encrypted_token, expires_at, flow_type, token_audience,
-                       provisioned_at, provisioning_client_id, scopes
-                FROM refresh_tokens WHERE user_id = ?
-                """,
-                (user_id,),
-            ) as cursor:
-                row = await cursor.fetchone()
-
-        if not row:
-            logger.debug(f"No refresh token found for user {user_id}")
-            return None
-
-        (
-            encrypted_token,
-            expires_at,
-            flow_type,
-            token_audience,
-            provisioned_at,
-            provisioning_client_id,
-            scopes_json,
-        ) = row
-
-        # Check expiration
-        if expires_at is not None and expires_at < time.time():
-            logger.warning(
-                f"Refresh token for user {user_id} has expired (expired at {expires_at})"
-            )
-            await self.delete_refresh_token(user_id)
-            return None
-
+        start_time = time.time()
        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                async with db.execute(
+                    """
+                    SELECT encrypted_token, expires_at, flow_type, token_audience,
+                           provisioned_at, provisioning_client_id, scopes
+                    FROM refresh_tokens WHERE user_id = ?
+                    """,
+                    (user_id,),
+                ) as cursor:
+                    row = await cursor.fetchone()
+
+            if not row:
+                logger.debug(f"No refresh token found for user {user_id}")
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
+            (
+                encrypted_token,
+                expires_at,
+                flow_type,
+                token_audience,
+                provisioned_at,
+                provisioning_client_id,
+                scopes_json,
+            ) = row
+
+            # Check expiration
+            if expires_at is not None and expires_at < time.time():
+                logger.warning(
+                    f"Refresh token for user {user_id} has expired (expired at {expires_at})"
+                )
+                await self.delete_refresh_token(user_id)
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
            decrypted_token = self.cipher.decrypt(encrypted_token).decode()
            scopes = json.loads(scopes_json) if scopes_json else None

@@ -416,6 +478,9 @@ class RefreshTokenStorage:
                f"Retrieved refresh token for user {user_id} (flow_type: {flow_type})"
            )

+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "success")
+
            return {
                "refresh_token": decrypted_token,
                "expires_at": expires_at,
@@ -427,6 +492,8 @@ class RefreshTokenStorage:
                "scopes": scopes,
            }
        except Exception as e:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "error")
            logger.error(f"Failed to decrypt refresh token for user {user_id}: {e}")
            return None

@@ -521,25 +588,34 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            cursor = await db.execute(
-                "DELETE FROM refresh_tokens WHERE user_id = ?",
-                (user_id,),
-            )
-            await db.commit()
-            deleted = cursor.rowcount > 0
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                cursor = await db.execute(
+                    "DELETE FROM refresh_tokens WHERE user_id = ?",
+                    (user_id,),
+                )
+                await db.commit()
+                deleted = cursor.rowcount > 0

-        if deleted:
-            logger.info(f"Deleted refresh token for user {user_id}")
-            await self._audit_log(
-                event="delete_refresh_token",
-                user_id=user_id,
-                auth_method="offline_access",
-            )
-        else:
-            logger.debug(f"No refresh token to delete for user {user_id}")
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "success")

-        return deleted
+            if deleted:
+                logger.info(f"Deleted refresh token for user {user_id}")
+                await self._audit_log(
+                    event="delete_refresh_token",
+                    user_id=user_id,
+                    auth_method="offline_access",
+                )
+            else:
+                logger.debug(f"No refresh token to delete for user {user_id}")
+
+            return deleted
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "error")
+            raise

    async def get_all_user_ids(self) -> list[str]:
        """
@@ -1104,6 +1180,123 @@ class RefreshTokenStorage:

        return deleted

+    # ============================================================================
+    # Webhook Registration Tracking (both BasicAuth and OAuth modes)
+    # ============================================================================
+
+    async def store_webhook(self, webhook_id: int, preset_id: str) -> None:
+        """
+        Store registered webhook ID for tracking.
+
+        Args:
+            webhook_id: Nextcloud webhook ID
+            preset_id: Preset identifier (e.g., "notes_sync", "calendar_sync")
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                "INSERT OR REPLACE INTO registered_webhooks (webhook_id, preset_id, created_at) VALUES (?, ?, ?)",
+                (webhook_id, preset_id, time.time()),
+            )
+            await db.commit()
+
+        logger.debug(f"Stored webhook {webhook_id} for preset '{preset_id}'")
+
+    async def get_webhooks_by_preset(self, preset_id: str) -> list[int]:
+        """
+        Get all webhook IDs registered for a preset.
+
+        Args:
+            preset_id: Preset identifier
+
+        Returns:
+            List of webhook IDs
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "SELECT webhook_id FROM registered_webhooks WHERE preset_id = ?",
+                (preset_id,),
+            )
+            rows = await cursor.fetchall()
+
+        return [row[0] for row in rows]
+
+    async def delete_webhook(self, webhook_id: int) -> bool:
+        """
+        Remove webhook from tracking.
+
+        Args:
+            webhook_id: Nextcloud webhook ID to remove
+
+        Returns:
+            True if webhook was deleted, False if not found
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM registered_webhooks WHERE webhook_id = ?", (webhook_id,)
+            )
+            await db.commit()
+            deleted = cursor.rowcount > 0
+
+        if deleted:
+            logger.debug(f"Deleted webhook {webhook_id} from tracking")
+
+        return deleted
+
+    async def list_all_webhooks(self) -> list[dict]:
+        """
+        List all tracked webhooks with metadata.
+
+        Returns:
+            List of webhook dictionaries with keys: webhook_id, preset_id, created_at
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "SELECT webhook_id, preset_id, created_at FROM registered_webhooks ORDER BY created_at DESC"
+            )
+            rows = await cursor.fetchall()
+
+        return [
+            {"webhook_id": row[0], "preset_id": row[1], "created_at": row[2]}
+            for row in rows
+        ]
+
+    async def clear_preset_webhooks(self, preset_id: str) -> int:
+        """
+        Delete all webhooks for a preset (bulk operation).
+
+        Args:
+            preset_id: Preset identifier
+
+        Returns:
+            Number of webhooks deleted
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        async with aiosqlite.connect(self.db_path) as db:
+            cursor = await db.execute(
+                "DELETE FROM registered_webhooks WHERE preset_id = ?", (preset_id,)
+            )
+            await db.commit()
+            deleted = cursor.rowcount
+
+        if deleted > 0:
+            logger.debug(f"Cleared {deleted} webhook(s) for preset '{preset_id}'")
+
+        return deleted
+

 async def generate_encryption_key() -> str:
    """
@@ -1117,7 +1310,7 @@ async def generate_encryption_key() -> str:

 # Example usage
 if __name__ == "__main__":
-    import asyncio
+    import anyio

    async def main():
        # Generate a key for testing
@@ -1125,4 +1318,4 @@ if __name__ == "__main__":
        print(f"Generated encryption key: {key}")
        print(f"Set this in your environment: export TOKEN_ENCRYPTION_KEY='{key}'")

-    asyncio.run(main())
+    anyio.run(main)
@@ -0,0 +1,524 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1">
+    <meta name="apple-mobile-web-app-capable" content="yes">
+    <meta name="theme-color" content="#0082c9">
+    <title>{% block title %}Nextcloud MCP Server{% endblock %}</title>
+
+    <!-- Favicon -->
+    <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' width='32' height='32' viewBox='0 0 512 512'><rect width='512' height='512' rx='80' ry='80' fill='%230082C9'/><path d='M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z' fill='%23fff'/></svg>">
+
+    <!-- Open Sans font -->
+    <style>
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: normal;
+            src: local('Open Sans'), local('OpenSans');
+        }
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: bold;
+            src: local('Open Sans Semibold'), local('OpenSans-Semibold');
+        }
+    </style>
+
+    {% block extra_head %}{% endblock %}
+
+    <style>
+        /* Nextcloud App Design System */
+
+        /* CSS Variables */
+        :root {
+            /* Primary Colors */
+            --color-primary: #00679e;
+            --color-primary-element: #00679e;
+            --color-primary-light: #e5eff5;
+            --color-primary-element-light: #e5eff5;
+
+            /* Background Colors */
+            --color-main-background: #ffffff;
+            --color-background-dark: #ededed;
+            --color-background-hover: #f5f5f5;
+
+            /* Text Colors */
+            --color-main-text: #222222;
+            --color-text-maxcontrast: #6b6b6b;
+            --color-text-light: #767676;
+
+            /* Border Colors */
+            --color-border: #ededed;
+            --color-border-dark: #dbdbdb;
+
+            /* Borders & Radius */
+            --border-radius: 3px;
+            --border-radius-large: 10px;
+            --border-radius-pill: 100px;
+
+            /* Spacing */
+            --default-grid-baseline: 4px;
+            --default-clickable-area: 44px;
+        }
+
+        /* SVG Icon Styles */
+        .nav-icon {
+            width: 20px;
+            height: 20px;
+            display: inline-block;
+            fill: var(--color-main-text);
+            opacity: 0.7;
+        }
+
+        .app-navigation-entry.active .nav-icon {
+            fill: var(--color-primary-element);
+            opacity: 1;
+        }
+
+        /* General */
+        * {
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+            color: var(--color-main-text);
+            background: var(--color-main-background);
+            margin: 0;
+            padding: 0;
+        }
+
+        h1, h2, h3 {
+            font-weight: 300;
+            line-height: 1.2;
+        }
+
+        h1 {
+            font-size: 32px;
+            margin: 0 0 20px 0;
+            color: var(--color-main-text);
+        }
+
+        h2 {
+            font-size: 20px;
+            margin: 20px 0 12px 0;
+            color: var(--color-main-text);
+            border-bottom: 1px solid var(--color-border);
+            padding-bottom: 8px;
+        }
+
+        h3 {
+            font-size: 16px;
+            margin: 16px 0 8px 0;
+            color: var(--color-main-text);
+            font-weight: 500;
+        }
+
+        img {
+            max-width: 100%;
+        }
+
+        /* App Header (simplified, no full menu) */
+        .app-header {
+            height: 50px;
+            background: var(--color-primary-element);
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            position: sticky;
+            top: 0;
+            z-index: 100;
+            display: flex;
+            align-items: center;
+            padding: 0 20px;
+        }
+
+        .app-header__brand {
+            color: white;
+            font-size: 18px;
+            font-weight: 600;
+            text-decoration: none;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+
+        .app-header__brand:hover {
+            opacity: 0.9;
+        }
+
+        .app-header__logo {
+            height: 32px;
+            width: 32px;
+            fill: white;
+        }
+
+        /* App Layout */
+        .app-content-wrapper {
+            display: flex;
+            height: calc(100vh - 50px);
+            overflow: hidden;
+        }
+
+        /* Side Navigation */
+        #app-navigation {
+            width: 250px;
+            background: var(--color-main-background);
+            border-right: 1px solid var(--color-border);
+            display: flex;
+            flex-direction: column;
+            flex-shrink: 0;
+            transition: margin-left 0.3s ease;
+        }
+
+        #app-navigation.app-navigation--closed {
+            margin-left: -250px;
+        }
+
+        .app-navigation__content {
+            flex: 1;
+            overflow-y: auto;
+            padding: 8px;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .app-navigation-list {
+            list-style: none;
+            padding: 0;
+            margin: 0;
+            flex: 1;
+        }
+
+        .app-navigation-entry {
+            position: relative;
+            margin-bottom: 2px;
+        }
+
+        .app-navigation-entry__wrapper {
+            display: flex;
+            align-items: center;
+            position: relative;
+        }
+
+        .app-navigation-entry-link {
+            display: flex;
+            align-items: center;
+            padding: 0 8px;
+            min-height: var(--default-clickable-area);
+            border-radius: var(--border-radius);
+            transition: background-color 100ms ease-in-out;
+            text-decoration: none;
+            color: var(--color-main-text);
+            flex: 1;
+            font-size: 14px;
+        }
+
+        .app-navigation-entry-link:hover {
+            background-color: var(--color-background-hover);
+        }
+
+        .app-navigation-entry.active .app-navigation-entry-link {
+            background-color: var(--color-primary-element-light);
+            font-weight: 500;
+        }
+
+        .app-navigation-entry-icon {
+            width: var(--default-clickable-area);
+            height: var(--default-clickable-area);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            margin-right: 0;
+        }
+
+        .app-navigation-entry__name {
+            flex: 1;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+
+        .app-navigation-entry__counter {
+            margin-left: auto;
+            padding: 2px 6px;
+            border-radius: var(--border-radius-pill);
+            background-color: var(--color-background-dark);
+            font-size: 11px;
+            color: var(--color-text-maxcontrast);
+            min-width: 20px;
+            text-align: center;
+        }
+
+        .app-navigation__settings {
+            list-style: none;
+            padding: 8px 0 0 0;
+            margin: 8px 0 0 0;
+            border-top: 1px solid var(--color-border);
+            flex-shrink: 0;
+        }
+
+        .app-navigation-toggle {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            position: fixed;
+            top: 60px;
+            left: 10px;
+            z-index: 110;
+            background: var(--color-main-background);
+            border: 1px solid var(--color-border);
+            border-radius: var(--border-radius);
+            padding: 8px 12px;
+            cursor: pointer;
+            box-shadow: 0 0 5px rgba(0,0,0,0.1);
+            transition: left 0.3s ease;
+        }
+
+        .app-navigation-toggle:hover {
+            background: var(--color-background-hover);
+        }
+
+        #app-navigation:not(.app-navigation--closed) ~ * .app-navigation-toggle {
+            left: 260px;
+        }
+
+        /* Main Content Area */
+        #app-content {
+            flex: 1;
+            overflow-y: auto;
+            background: var(--color-main-background);
+        }
+
+        .page-content {
+            max-width: 1000px;
+            margin: 0 auto;
+            padding: 24px;
+        }
+
+        .content-section {
+            background: var(--color-main-background);
+            border-radius: 0;
+            padding: 0;
+            box-shadow: none;
+        }
+
+        .content-section h1 {
+            font-size: 24px;
+            font-weight: 600;
+            margin-bottom: 24px;
+        }
+
+        .content-section h2 {
+            font-size: 18px;
+            font-weight: 500;
+            margin: 24px 0 12px 0;
+            border-bottom: none;
+            padding-bottom: 0;
+        }
+
+        .content-section h3 {
+            font-size: 16px;
+            font-weight: 500;
+        }
+
+        /* Responsive */
+        @media (max-width: 768px) {
+            #app-navigation {
+                position: fixed;
+                height: calc(100vh - 50px);
+                z-index: 105;
+                box-shadow: 2px 0 8px rgba(0,0,0,0.1);
+            }
+
+            .page-content {
+                padding: 16px;
+            }
+        }
+
+        /* Footer */
+        footer.page-footer {
+            background-color: #0F0833;
+            color: #ffffff;
+            padding: 40px 0;
+            margin-top: 60px;
+        }
+
+        footer.page-footer .bootstrap-container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 0 20px;
+        }
+
+        footer.page-footer h1 {
+            font-size: 15px;
+            font-weight: bold;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 20px;
+        }
+
+        footer.page-footer ul {
+            list-style-type: none;
+            padding-left: 0;
+        }
+
+        footer.page-footer li {
+            font-size: 13px;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 0;
+        }
+
+        footer.page-footer li a {
+            color: #ffffff;
+            text-decoration: none;
+            display: block;
+            padding: 4px 0;
+        }
+
+        footer.page-footer li a:hover {
+            text-decoration: underline;
+        }
+
+        footer.page-footer p {
+            font-size: 15px;
+            line-height: 1.8;
+            color: #ffffff;
+        }
+
+        footer.page-footer p.copyright {
+            color: rgba(255, 255, 255, 0.5);
+            font-size: 13px;
+            text-align: center;
+            margin-top: 30px;
+        }
+
+        /* Buttons */
+        .btn {
+            border-radius: 50px;
+            padding: 10px 20px;
+            text-decoration: none;
+            display: inline-block;
+            cursor: pointer;
+            border: none;
+            font-size: 14px;
+            transition: all 0.3s;
+        }
+
+        .btn-primary {
+            background: #0082C9;
+            border: 1px solid #0062C9;
+            color: #fff;
+        }
+
+        .btn-primary:hover {
+            background: #006ba3;
+        }
+
+        /* Tables */
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+        }
+
+        td {
+            padding: 12px 8px;
+            border-bottom: 1px solid var(--color-border);
+            font-size: 14px;
+        }
+
+        td:first-child {
+            width: 180px;
+            color: var(--color-text-maxcontrast);
+            font-weight: 500;
+        }
+
+        code {
+            background-color: var(--color-background-dark);
+            padding: 2px 6px;
+            border-radius: var(--border-radius);
+            font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+            font-size: 90%;
+            color: var(--color-main-text);
+        }
+
+        /* Badges */
+        .badge {
+            display: inline-block;
+            padding: 3px 8px;
+            border-radius: 12px;
+            font-size: 12px;
+            font-weight: bold;
+            text-transform: uppercase;
+        }
+
+        .badge-oauth {
+            background-color: #4caf50;
+            color: white;
+        }
+
+        .badge-basic {
+            background-color: #2196f3;
+            color: white;
+        }
+
+        /* Messages */
+        .warning {
+            background-color: #fff3cd;
+            border-left: 4px solid #ffc107;
+            padding: 15px;
+            margin: 15px 0;
+            color: #856404;
+        }
+
+        .info-message {
+            background-color: #e3f2fd;
+            border-left: 4px solid #2196f3;
+            padding: 15px;
+            margin: 15px 0;
+            color: #1565c0;
+        }
+
+        .error {
+            background-color: #ffebee;
+            border-left: 4px solid #d32f2f;
+            padding: 15px;
+            margin: 15px 0;
+            color: #c62828;
+        }
+
+        .success {
+            background-color: #e8f5e9;
+            border: 2px solid #4caf50;
+            padding: 30px;
+            border-radius: 8px;
+            text-align: center;
+        }
+
+        .success h1 {
+            color: #4caf50;
+        }
+
+        {% block extra_styles %}{% endblock %}
+    </style>
+</head>
+<body>
+    <!-- App Header -->
+    <header class="app-header">
+        <a href="/app" class="app-header__brand">
+            <svg class="app-header__logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+                <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+            </svg>
+            <span>Nextcloud MCP Server</span>
+        </a>
+    </header>
+
+    <!-- App Content Wrapper (Sidebar + Main Content) -->
+    {% block content %}{% endblock %}
+
+    {% block scripts %}{% endblock %}
+</body>
+</html>
@@ -0,0 +1,19 @@
+{% extends "base.html" %}
+
+{% block title %}{{ error_title|default('Error') }} - Nextcloud MCP Server{% endblock %}
+
+{% block content %}
+<h1>{{ error_title|default('Error') }}</h1>
+
+<div class="error">
+    <strong>Error:</strong> {{ error_message }}
+</div>
+
+{% if login_url %}
+<p><a href="{{ login_url }}" class="btn btn-primary">Login again</a></p>
+{% endif %}
+
+{% if back_url %}
+<p><a href="{{ back_url }}" class="btn">Go Back</a></p>
+{% endif %}
+{% endblock %}
@@ -0,0 +1,21 @@
+{% extends "base.html" %}
+
+{% block title %}{{ success_title|default('Success') }} - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+{% if redirect_url and redirect_delay %}
+<meta http-equiv="refresh" content="{{ redirect_delay }};url={{ redirect_url }}">
+{% endif %}
+{% endblock %}
+
+{% block content %}
+<div class="success">
+    <h1>{{ success_title|default('✓ Success') }}</h1>
+    {% for message in success_messages %}
+    <p>{{ message }}</p>
+    {% endfor %}
+    {% if redirect_url %}
+    <p>Redirecting...</p>
+    {% endif %}
+</div>
+{% endblock %}
@@ -0,0 +1,650 @@
+{% extends "base.html" %}
+
+{% block title %}Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- htmx for dynamic loading -->
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+
+    <!-- Alpine.js for state management -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+
+    <!-- Plotly.js for vector visualization -->
+    <script src="https://cdn.plot.ly/plotly-3.3.0.min.js"></script>
+
+    <!-- Vector Viz static assets -->
+    <link rel="stylesheet" href="/app/static/vector-viz.css">
+{% endblock %}
+
+{% block extra_styles %}
+    /* Smooth htmx transitions */
+    .htmx-swapping {
+        opacity: 0;
+        transition: opacity 200ms ease-out;
+    }
+
+    .htmx-settling {
+        opacity: 1;
+        transition: opacity 200ms ease-in;
+    }
+
+    /* Logout button styling */
+    .logout-section {
+        margin-top: 20px;
+        padding-top: 20px;
+        border-top: 1px solid var(--color-border);
+    }
+
+    /* Welcome tab specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper" x-data="{ activeSection: 'welcome', navOpen: true }">
+    <!-- Side Navigation -->
+    <nav id="app-navigation" :class="{ 'app-navigation--closed': !navOpen }">
+        <div class="app-navigation__content">
+            <!-- Navigation List -->
+            <ul class="app-navigation-list">
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'welcome' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'welcome'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10,20V14H14V20H19V12H22L12,3L2,12H5V20H10Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Welcome</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'user-info' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'user-info'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">User Info</span>
+                        </a>
+                    </div>
+                </li>
+
+                {% if show_vector_sync_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-sync' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-sync'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Sync</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-viz' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-viz'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Viz</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+
+                {% if show_webhooks_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'webhooks' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'webhooks'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10.59,13.41C11,13.8 11,14.44 10.59,14.83C10.2,15.22 9.56,15.22 9.17,14.83C7.22,12.88 7.22,9.71 9.17,7.76V7.76L12.71,4.22C14.66,2.27 17.83,2.27 19.78,4.22C21.73,6.17 21.73,9.34 19.78,11.29L18.29,12.78C18.3,11.96 18.17,11.14 17.89,10.36L18.36,9.88C19.54,8.71 19.54,6.81 18.36,5.64C17.19,4.46 15.29,4.46 14.12,5.64L10.59,9.17C9.41,10.34 9.41,12.24 10.59,13.41M13.41,9.17C13.8,8.78 14.44,8.78 14.83,9.17C16.78,11.12 16.78,14.29 14.83,16.24V16.24L11.29,19.78C9.34,21.73 6.17,21.73 4.22,19.78C2.27,17.83 2.27,14.66 4.22,12.71L5.71,11.22C5.7,12.04 5.83,12.86 6.11,13.65L5.64,14.12C4.46,15.29 4.46,17.19 5.64,18.36C6.81,19.54 8.71,19.54 9.88,18.36L13.41,14.83C14.59,13.66 14.59,11.76 13.41,10.59C13,10.2 13,9.56 13.41,9.17Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Webhooks</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+            </ul>
+
+            <!-- Settings/Logout at bottom -->
+            {% if logout_url %}
+            <ul class="app-navigation__settings">
+                <li class="app-navigation-entry">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="{{ logout_url }}" class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M16,17V14H9V10H16V7L21,12L16,17M14,2A2,2 0 0,1 16,4V6H14V4H5V20H14V18H16V20A2,2 0 0,1 14,22H5A2,2 0 0,1 3,20V4A2,2 0 0,1 5,2H14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Logout</span>
+                        </a>
+                    </div>
+                </li>
+            </ul>
+            {% endif %}
+        </div>
+
+        <!-- Toggle Button (mobile) -->
+        <button @click="navOpen = !navOpen"
+                class="app-navigation-toggle"
+                :aria-expanded="navOpen.toString()">
+            ☰
+        </button>
+    </nav>
+
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Welcome Section -->
+            <div x-show="activeSection === 'welcome'">
+                <!-- Hero Section -->
+                <div class="hero-section">
+                    <h1>Welcome to Nextcloud MCP Server</h1>
+                    <p>
+                        Interactive user interface for semantic search and document retrieval.
+                        Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                    </p>
+                </div>
+
+                <!-- Authentication Status -->
+                <div class="auth-status">
+                    <svg viewBox="0 0 24 24">
+                        <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                    </svg>
+                    <div class="auth-status-text">
+                        <strong>Authenticated as: {{ username }}</strong>
+                        <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                    </div>
+                </div>
+
+                {% if vector_sync_enabled %}
+                <!-- Vector Sync Enabled Content -->
+                <div class="info-section">
+                    <h2>About Semantic Search</h2>
+                    <p>
+                        This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                        Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                        conceptually similar content across your Nextcloud apps.
+                    </p>
+                    <p>
+                        <strong>How it works:</strong>
+                    </p>
+                    <ul>
+                        <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                        <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                        <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                        <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                    </ul>
+                </div>
+
+                <div class="info-section">
+                    <h2>RAG Workflow Integration</h2>
+                    <p>
+                        This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                        Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                    </p>
+                    <ul>
+                        <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                        <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                        <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                    </ul>
+
+                    <!-- RAG Workflow Diagram -->
+                    <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; overflow-x: auto;">
+                        <div style="text-align: center; font-weight: 600; margin-bottom: 20px; color: var(--color-primary-element); font-size: 16px;">
+                            MCP Sampling RAG Workflow
+                        </div>
+
+                        <!-- Four-component bidirectional flow -->
+                        <div style="max-width: 1000px; margin: 0 auto;">
+                            <div style="display: grid; grid-template-columns: 0.7fr auto 1fr auto 1fr auto 0.9fr; gap: 10px; align-items: center;">
+                                <!-- User -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 14px; text-align: center;">
+                                    <div style="font-size: 26px; margin-bottom: 5px;">👤</div>
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px;">User</div>
+                                    <div style="font-size: 9px; color: var(--color-text-maxcontrast); font-style: italic; margin-top: 5px; line-height: 1.2;">
+                                        "What are health<br>benefits of coffee?"
+                                    </div>
+                                </div>
+
+                                <!-- Arrow User <-> Client -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-text-maxcontrast);">↔</div>
+                                </div>
+
+                                <!-- MCP Client + LLM (combined) -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Client + LLM</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; margin-bottom: 6px;">
+                                        <div style="font-size: 9px; color: var(--color-text-maxcontrast);">(Claude Code)</div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; border: 2px solid var(--color-primary-element);">
+                                        <div style="font-size: 16px; margin-bottom: 2px;">🧠</div>
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 10px;">Client's LLM</div>
+                                        <div style="font-size: 8px; color: var(--color-text-maxcontrast);">(Claude)</div>
+                                    </div>
+
+                                    <div style="margin-top: 8px; font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        <strong>Enables RAG:</strong><br>
+                                        Receives context,<br>
+                                        generates answer
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Client <-> Server -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Query +<br>
+                                        Sampling
+                                    </div>
+                                </div>
+
+                                <!-- MCP Server -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Server</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">1. Semantic Search</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Vector embeddings<br>
+                                            BM25 Hybrid + RRF
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">2. Retrieve Context</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Top relevant docs<br>
+                                            with scores
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">3. Format Response</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Document chunks<br>
+                                            with citations
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">4. Send to LLM</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Via MCP sampling<br>
+                                            for answer generation
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Server <-> Nextcloud -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Retrieve
+                                    </div>
+                                </div>
+
+                                <!-- Nextcloud -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 12px; text-align: center; position: relative;">
+                                    <img src="/app/static/nextcloud-logo.png" alt="Nextcloud" style="width: 40px; height: 40px; margin-bottom: 6px;" />
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px; margin-bottom: 4px;">Nextcloud</div>
+                                    <div style="font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        Notes, Calendar,<br>
+                                        Files, Contacts,<br>
+                                        Deck
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- Explanation below diagram -->
+                            <div style="margin-top: 24px; padding: 16px; background: var(--color-background-hover); border-radius: var(--border-radius); border-left: 4px solid var(--color-primary-element);">
+                                <div style="font-size: 12px; color: var(--color-main-text); line-height: 1.6;">
+                                    <strong>How RAG works via MCP Sampling:</strong>
+                                </div>
+                                <ol style="margin: 8px 0 0 0; padding-left: 20px; font-size: 11px; color: var(--color-text-maxcontrast); line-height: 1.6;">
+                                    <li>User asks question through MCP Client</li>
+                                    <li>Client sends query to MCP Server</li>
+                                    <li>Server retrieves relevant document context from Nextcloud</li>
+                                    <li><strong>Server sends context back to Client's LLM</strong> (MCP Sampling)</li>
+                                    <li>Client's LLM generates answer with citations using retrieved context</li>
+                                    <li>Answer returned to user</li>
+                                </ol>
+                                <div style="margin-top: 8px; font-size: 10px; color: var(--color-text-maxcontrast); font-style: italic;">
+                                    The server has no LLM - it only retrieves context. The client's existing LLM is reused for answer generation.
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <p style="margin-top: 16px;">
+                        <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                        Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                        full control over which model is used and ensuring all processing happens client-side.
+                    </p>
+
+                    <p>
+                        By using this interface, you can preview search results, understand relevance scores, and verify
+                        that the system retrieves the right information before it reaches the LLM.
+                    </p>
+                </div>
+
+                <!-- Feature Cards -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-sync'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Sync Status</h3>
+                        <p>
+                            Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                            and synchronization status.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-viz'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Visualization</h3>
+                        <p>
+                            Interactive search interface with 2D PCA visualization. Compare algorithms,
+                            view relevance scores, and explore matched document chunks.
+                        </p>
+                    </a>
+                </div>
+
+                {% else %}
+                <!-- Vector Sync Disabled Content -->
+                <div class="warning">
+                    <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                    <p>
+                        Semantic search and vector visualization features are currently disabled.
+                        To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                    </p>
+                    <p style="margin-bottom: 0;">
+                        <strong>Learn more:</strong>
+                        <a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                            Configuration Guide
+                        </a>
+                    </p>
+                </div>
+
+                <!-- Limited Feature Card -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+                </div>
+                {% endif %}
+
+                <!-- Documentation Section -->
+                <div class="info-section" style="margin-top: 40px;">
+                    <h2>Documentation</h2>
+                    <p>
+                        For detailed information about configuration, authentication modes, and advanced features,
+                        please refer to the project documentation:
+                    </p>
+                    <ul>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                        {% if vector_sync_enabled %}
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                        {% endif %}
+                    </ul>
+                </div>
+            </div>
+
+            <!-- User Info Section -->
+            <div x-show="activeSection === 'user-info'">
+                <div class="content-section">
+                    <h1>User Information</h1>
+                    {{ user_info_tab_html|safe }}
+                </div>
+            </div>
+
+            {% if show_vector_sync_tab %}
+            <!-- Vector Sync Section -->
+            <div x-show="activeSection === 'vector-sync'">
+                <div class="content-section">
+                    <h1>Vector Sync Status</h1>
+                    {{ vector_sync_tab_html|safe }}
+                </div>
+            </div>
+
+            <!-- Vector Viz Section -->
+            <div x-show="activeSection === 'vector-viz'">
+                <div class="content-section">
+                    <h1>Vector Visualization</h1>
+                    <div hx-get="/app/vector-viz" hx-trigger="load" hx-swap="outerHTML">
+                        <p style="color: #999;">Loading vector visualization...</p>
+                    </div>
+                </div>
+            </div>
+            {% endif %}
+
+            {% if show_webhooks_tab %}
+            <!-- Webhooks Section -->
+            <div x-show="activeSection === 'webhooks'">
+                <div class="content-section">
+                    <h1>Webhook Management</h1>
+                    {{ webhooks_tab_html|safe }}
+                </div>
+            </div>
+            {% endif %}
+        </div>
+    </main>
+</div>
+
+<script>
+    // Set global Nextcloud base URL for use in external JS
+    window.NEXTCLOUD_BASE_URL = '{{ nextcloud_host_for_links }}';
+</script>
+<script src="/app/static/vector-viz.js"></script>
+{% endblock %}
@@ -0,0 +1,180 @@
+<div x-data="vizApp()">
+    <div class="viz-layout">
+        <!-- Top: Search Controls -->
+        <div class="viz-card viz-controls-card">
+            <form @submit.prevent="executeSearch">
+                <div class="viz-controls-grid">
+                    <div class="viz-control-group">
+                        <label>Search Query</label>
+                        <input type="text" x-model="query" placeholder="Enter search query..." required />
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Algorithm</label>
+                        <select x-model="algorithm">
+                            <option value="semantic">Semantic (Dense)</option>
+                            <option value="bm25_hybrid" selected>BM25 Hybrid</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Fusion</label>
+                        <select x-model="fusion" :disabled="algorithm !== 'bm25_hybrid'" :style="algorithm !== 'bm25_hybrid' ? 'opacity: 0.5; cursor: not-allowed;' : ''">
+                            <option value="rrf" selected>RRF</option>
+                            <option value="dbsf">DBSF</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="submit" class="viz-btn">Search</button>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="button" class="viz-btn-secondary" @click="showAdvanced = !showAdvanced">
+                            <span x-text="showAdvanced ? 'Hide' : 'Advanced'"></span>
+                        </button>
+                    </div>
+                </div>
+
+                <!-- Advanced Options (Collapsible) -->
+                <div x-show="showAdvanced" style="margin-top: 16px;">
+                    <div class="viz-controls-grid" style="grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));">
+                        <div class="viz-control-group">
+                            <label>Document Types</label>
+                            <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 8px; font-size: 13px;">
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="" style="margin-right: 4px;">
+                                    <span>All</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="note" style="margin-right: 4px;">
+                                    <span>Notes</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="file" style="margin-right: 4px;">
+                                    <span>Files</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="calendar" style="margin-right: 4px;">
+                                    <span>Calendar</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="contact" style="margin-right: 4px;">
+                                    <span>Contacts</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="deck" style="margin-right: 4px;">
+                                    <span>Deck</span>
+                                </label>
+                            </div>
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Score Threshold</label>
+                            <input type="number" x-model.number="scoreThreshold" min="0" max="1" step="any" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Result Limit</label>
+                            <input type="number" x-model.number="limit" min="1" max="1000" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Display Options</label>
+                            <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal; margin-top: 4px;">
+                                <input type="checkbox" x-model="showQueryPoint" @change="updatePlot()" style="margin-right: 6px;">
+                                <span>Show Query Point</span>
+                            </label>
+                        </div>
+                    </div>
+                </div>
+            </form>
+        </div>
+
+        <!-- Plot -->
+        <div class="viz-card viz-card-plot">
+            <div id="viz-plot-container">
+                <div x-show="loading" class="viz-loading-overlay" x-transition.opacity.duration.200ms>
+                    Executing search and computing PCA projection...
+                </div>
+                <div id="viz-plot" x-show="!loading" x-transition.opacity.duration.200ms></div>
+            </div>
+        </div>
+
+        <!-- Results -->
+        <div class="viz-card" style="flex: 0 0 auto;">
+            <h3 style="margin-top: 0;">Search Results (<span x-text="loading ? '...' : results.length"></span>)</h3>
+
+        <div x-show="loading" class="viz-loading" x-transition.opacity.duration.200ms>
+            Loading results...
+        </div>
+
+        <div x-show="!loading && results.length === 0" class="viz-no-results" x-transition.opacity.duration.200ms>
+            No results found. Try a different query or adjust your search parameters.
+        </div>
+
+        <template x-if="!loading && results.length > 0">
+            <div x-transition.opacity.duration.200ms>
+                <template x-for="result in results" :key="`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`">
+                    <div style="padding: 12px; border-bottom: 1px solid #eee;">
+                        <a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
+                            <span x-text="result.title"></span>
+                        </a>
+                        <div style="font-size: 14px; color: #666; margin-top: 4px;"
+                             x-text="result.excerpt.length > 200 ? result.excerpt.substring(0, 200) + '...' : result.excerpt"></div>
+                        <div style="font-size: 12px; color: #999; margin-top: 4px;">
+                            Raw Score: <span x-text="result.original_score.toFixed(3)"></span>
+                            (<span x-text="(result.score * 100).toFixed(0)"></span>% relative) |
+                            Type: <span x-text="result.doc_type"></span>
+                        </div>
+
+                        <!-- Show Chunk button (only if chunk position is available) -->
+                        <template x-if="hasChunkPosition(result)">
+                            <button
+                                class="chunk-toggle-btn"
+                                @click="toggleChunk(result)"
+                                x-text="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`) ? 'Hide Chunk' : 'Show Chunk'"
+                            ></button>
+                        </template>
+
+                        <!-- Chunk context (expanded inline) -->
+                        <template x-if="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`)">
+                            <div class="chunk-context" x-transition.opacity.duration.200ms>
+                                <template x-if="chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div style="color: #666; font-style: italic;">Loading chunk...</div>
+                                </template>
+                                <template x-if="!chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div>
+                                        <!-- Highlighted page image for PDFs -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image">
+                                            <div class="chunk-image-container">
+                                                <div class="chunk-image-header">
+                                                    <span>Page <span x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"></span></span>
+                                                </div>
+                                                <img
+                                                    :src="'data:image/png;base64,' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image"
+                                                    :alt="'Page ' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"
+                                                    class="chunk-highlighted-image"
+                                                />
+                                            </div>
+                                        </template>
+                                        <!-- Text context -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_before">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                        <span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_after">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                    </div>
+                                </template>
+                            </div>
+                        </template>
+                    </div>
+                </template>
+            </div>
+        </template>
+        </div><!-- Search Results -->
+    </div><!-- .viz-layout -->
+</div><!-- x-data="vizApp()" -->
@@ -0,0 +1,392 @@
+{% extends "base.html" %}
+
+{% block title %}Welcome - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- Alpine.js for interactive elements -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+{% endblock %}
+
+{% block extra_styles %}
+    /* Welcome page specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper">
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Hero Section -->
+            <div class="hero-section">
+                <h1>Welcome to Nextcloud MCP Server</h1>
+                <p>
+                    Interactive user interface for semantic search and document retrieval.
+                    Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                </p>
+            </div>
+
+            <!-- Authentication Status -->
+            <div class="auth-status">
+                <svg viewBox="0 0 24 24">
+                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                </svg>
+                <div class="auth-status-text">
+                    <strong>Authenticated as: {{ username }}</strong>
+                    <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                </div>
+            </div>
+
+            {% if vector_sync_enabled %}
+            <!-- Vector Sync Enabled Content -->
+            <div class="info-section">
+                <h2>About Semantic Search</h2>
+                <p>
+                    This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                    Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                    conceptually similar content across your Nextcloud apps.
+                </p>
+                <p>
+                    <strong>How it works:</strong>
+                </p>
+                <ul>
+                    <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                    <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                    <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                    <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                </ul>
+            </div>
+
+            <div class="info-section">
+                <h2>RAG Workflow Integration</h2>
+                <p>
+                    This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                    Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                </p>
+                <ul>
+                    <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                    <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                    <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                </ul>
+
+                <!-- RAG Workflow Diagram -->
+                <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace; font-size: 13px; line-height: 1.8; overflow-x: auto;">
+                    <div style="text-align: center; font-weight: 600; margin-bottom: 16px; color: var(--color-primary-element); font-size: 14px;">
+                        MCP Sampling RAG Workflow
+                    </div>
+                    <pre style="margin: 0; color: var(--color-main-text);">
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  User asks: "What are health benefits of coffee?"
+│  (Claude Code)  │
+└────────┬────────┘
+         │ (1) User question
+         ↓
+┌────────────────────────────────────────────────────────────────────────┐
+│                      <strong>Nextcloud MCP Server</strong>                          │
+│  ┌──────────────────────────────────────────────────────────────────┐  │
+│  │ <strong>nc_semantic_search_answer</strong> Tool (MCP Sampling-enabled)      │  │
+│  │                                                                  │  │
+│  │  (2) Semantic Search                                             │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ Query: "health benefits of coffee"                     │     │  │
+│  │  │ → Convert to 768D vector embedding                     │     │  │
+│  │  │ → Search Qdrant (BM25 Hybrid + RRF fusion)             │     │  │
+│  │  │ → Retrieve top 5 relevant document chunks              │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (3) Construct Prompt with Context                               │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ "What are health benefits of coffee?                   │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Documents:                                             │     │  │
+│  │  │  - [MED-2155] Effects of habitual coffee consumption...│     │  │
+│  │  │  - [MED-1646] Beverage consumption guidance...         │     │  │
+│  │  │  - [MED-1627] Coffee and depression risk...            │     │  │
+│  │  │  ...                                                    │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Provide answer with citations."                        │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (4) MCP Sampling Request                                        │  │
+│  │  ─────────────────────────────────────────────────────────────> │  │
+│  └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+         │
+         │ Sampling request with prompt + context
+         ↓
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  (5) Client's LLM generates answer using retrieved context
+│    (Claude)     │      → "Coffee consumption (2-3 cups/day) is associated with
+└────────┬────────┘         reduced risk of type 2 diabetes, cardiovascular disease,
+         │                  and improved liver health (Document 1, 2)..."
+         │
+         │ (6) Answer with citations
+         ↓
+┌─────────────────┐
+│      User       │  Receives comprehensive answer with source citations
+└─────────────────┘</pre>
+                </div>
+
+                <p style="margin-top: 16px;">
+                    <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                    Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                    full control over which model is used and ensuring all processing happens client-side.
+                </p>
+
+                <p>
+                    By using this interface, you can preview search results, understand relevance scores, and verify
+                    that the system retrieves the right information before it reaches the LLM.
+                </p>
+            </div>
+
+            <!-- Feature Cards -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-sync" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Sync Status</h3>
+                    <p>
+                        Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                        and synchronization status.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-viz" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Visualization</h3>
+                    <p>
+                        Interactive search interface with 2D PCA visualization. Compare algorithms,
+                        view relevance scores, and explore matched document chunks.
+                    </p>
+                </a>
+            </div>
+
+            {% else %}
+            <!-- Vector Sync Disabled Content -->
+            <div class="warning">
+                <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                <p>
+                    Semantic search and vector visualization features are currently disabled.
+                    To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                </p>
+                <p style="margin-bottom: 0;">
+                    <strong>Learn more:</strong>
+                    <a href="https://github.com/YOUR_REPO/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                        Configuration Guide
+                    </a>
+                </p>
+            </div>
+
+            <!-- Limited Feature Card -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+            </div>
+            {% endif %}
+
+            <!-- Documentation Section -->
+            <div class="info-section" style="margin-top: 40px;">
+                <h2>Documentation</h2>
+                <p>
+                    For detailed information about configuration, authentication modes, and advanced features,
+                    please refer to the project documentation:
+                </p>
+                <ul>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                    {% if vector_sync_enabled %}
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                    {% endif %}
+                </ul>
+            </div>
+        </div>
+    </main>
+</div>
+{% endblock %}
@@ -14,16 +14,16 @@ The Token Broker provides:
 - Session vs background token separation (RFC 8693)
 """

-import asyncio
 import logging
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Optional, Tuple

+import anyio
 import httpx
 import jwt
 from cryptography.fernet import Fernet

-from nextcloud_mcp_server.auth.refresh_token_storage import RefreshTokenStorage
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
 from nextcloud_mcp_server.auth.token_exchange import exchange_token_for_delegation

 logger = logging.getLogger(__name__)
@@ -43,7 +43,7 @@ class TokenCache:
        self._cache: Dict[str, Tuple[str, datetime]] = {}
        self._ttl = timedelta(seconds=ttl_seconds)
        self._early_refresh = timedelta(seconds=early_refresh_seconds)
-        self._lock = asyncio.Lock()
+        self._lock = anyio.Lock()

    async def get(self, user_id: str) -> Optional[str]:
        """Get cached token if valid."""
@@ -20,7 +20,7 @@ import httpx
 import jwt

 from ..config import get_settings
-from .refresh_token_storage import RefreshTokenStorage
+from .storage import RefreshTokenStorage

 logger = logging.getLogger(__name__)

@@ -26,6 +26,10 @@ from jwt import PyJWKClient
 from mcp.server.auth.provider import AccessToken, TokenVerifier

 from nextcloud_mcp_server.config import Settings
+from nextcloud_mcp_server.observability.metrics import (
+    oauth_token_cache_hits_total,
+    record_oauth_token_validation,
+)

 logger = logging.getLogger(__name__)

@@ -105,8 +109,11 @@ class UnifiedTokenVerifier(TokenVerifier):
        cached = self._get_cached_token(token)
        if cached:
            logger.debug("Token found in cache")
+            oauth_token_cache_hits_total.labels(hit="true").inc()
            return cached

+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
        # Both modes do the same validation (MCP audience only)
        return await self._verify_mcp_audience(token)

@@ -124,13 +131,24 @@ class UnifiedTokenVerifier(TokenVerifier):
        Returns:
            AccessToken if valid with MCP audience, None otherwise
        """
+        validation_method = "unknown"
        try:
            # Attempt JWT verification first
            if self._is_jwt_format(token) and self.jwks_client:
+                validation_method = "jwt"
                payload = await self._verify_jwt_signature(token)
+                if payload:
+                    record_oauth_token_validation("jwt", "valid")
+                else:
+                    record_oauth_token_validation("jwt", "invalid")
            else:
                # Fall back to introspection for opaque tokens
+                validation_method = "introspect"
                payload = await self._introspect_token(token)
+                if payload:
+                    record_oauth_token_validation("introspect", "valid")
+                else:
+                    record_oauth_token_validation("introspect", "invalid")
                if not payload:
                    return None

@@ -146,6 +164,8 @@ class UnifiedTokenVerifier(TokenVerifier):
                    f"Got {audiences}, need MCP ({self.settings.oidc_client_id} or "
                    f"{self.settings.nextcloud_mcp_server_url})"
                )
+                # Record as invalid due to audience mismatch
+                record_oauth_token_validation(validation_method, "invalid")
                return None

            # Log based on mode for clarity
@@ -163,6 +183,7 @@ class UnifiedTokenVerifier(TokenVerifier):

        except Exception as e:
            logger.error(f"Token verification failed: {e}")
+            record_oauth_token_validation(validation_method, "error")
            return None

    def _has_mcp_audience(self, payload: dict[str, Any]) -> bool:
@@ -231,17 +252,21 @@ class UnifiedTokenVerifier(TokenVerifier):
                token,
                signing_key.key,
                algorithms=["RS256"],
-                issuer=self.settings.oidc_issuer
-                if hasattr(self.settings, "oidc_issuer")
-                else None,
+                issuer=(
+                    self.settings.oidc_issuer
+                    if hasattr(self.settings, "oidc_issuer")
+                    else None
+                ),
                options={
                    "verify_signature": True,
                    "verify_exp": True,
                    "verify_iat": True,
-                    "verify_iss": True
-                    if hasattr(self.settings, "oidc_issuer")
-                    and self.settings.oidc_issuer
-                    else False,
+                    "verify_iss": (
+                        True
+                        if hasattr(self.settings, "oidc_issuer")
+                        and self.settings.oidc_issuer
+                        else False
+                    ),
                    "verify_aud": False,  # We handle audience validation separately
                },
            )
@@ -278,10 +303,13 @@ class UnifiedTokenVerifier(TokenVerifier):

        try:
            # Introspection requires client authentication
+            client_id = self.settings.oidc_client_id
+            client_secret = self.settings.oidc_client_secret
+            assert client_id is not None and client_secret is not None
            response = await self.http_client.post(
                self.introspection_uri,
                data={"token": token},
-                auth=(self.settings.oidc_client_id, self.settings.oidc_client_secret),
+                auth=(client_id, client_secret),
            )

            if response.status_code == 200:
@@ -9,15 +9,83 @@ For OAuth mode: Requires browser-based OAuth login to establish session.

 import logging
 import os
+from pathlib import Path
 from typing import Any

 import httpx
+from jinja2 import Environment, FileSystemLoader
 from starlette.authentication import requires
 from starlette.requests import Request
 from starlette.responses import HTMLResponse, JSONResponse

+from nextcloud_mcp_server.client import NextcloudClient
+
 logger = logging.getLogger(__name__)

+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
+
+
+async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudClient:
+    """Get an authenticated Nextcloud client for user info page operations.
+
+    This is a shared helper for authenticated routes that need to access
+    Nextcloud APIs. It handles both BasicAuth and OAuth authentication modes.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        Authenticated NextcloudClient
+
+    Raises:
+        RuntimeError: If credentials/session not configured
+    """
+    oauth_ctx = getattr(request.app.state, "oauth_context", None)
+
+    # BasicAuth mode - use credentials from environment
+    if not oauth_ctx:
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        username = os.getenv("NEXTCLOUD_USERNAME")
+        password = os.getenv("NEXTCLOUD_PASSWORD")
+
+        if not all([nextcloud_host, username, password]):
+            raise RuntimeError("BasicAuth credentials not configured")
+
+        from httpx import BasicAuth
+
+        assert nextcloud_host is not None
+        assert username is not None
+        assert password is not None
+        return NextcloudClient(
+            base_url=nextcloud_host,
+            username=username,
+            auth=BasicAuth(username, password),
+        )
+
+    # OAuth mode - get token from session
+    storage = oauth_ctx.get("storage")
+    session_id = request.cookies.get("mcp_session")
+
+    if not storage or not session_id:
+        raise RuntimeError("Session not found")
+
+    token_data = await storage.get_refresh_token(session_id)
+    if not token_data or "access_token" not in token_data:
+        raise RuntimeError("No access token found in session")
+
+    access_token = token_data["access_token"]
+    username = token_data.get("username")
+    nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+    if not nextcloud_host or not username:
+        raise RuntimeError("Nextcloud host or username not configured")
+
+    return NextcloudClient.from_token(
+        base_url=nextcloud_host, token=access_token, username=username
+    )
+

 async def _get_processing_status(request: Request) -> dict[str, Any] | None:
    """Get vector sync processing status.
@@ -88,6 +156,71 @@ async def _get_processing_status(request: Request) -> dict[str, Any] | None:
        return None


+@requires("authenticated", redirect="oauth_login")
+async def vector_sync_status_fragment(request: Request) -> HTMLResponse:
+    """Vector sync status fragment endpoint - returns HTML fragment with current status.
+
+    This endpoint is polled by htmx to provide real-time updates of vector sync processing
+    status without requiring a full page refresh.
+
+    Requires authentication via session cookie (redirects to oauth_login route if not authenticated).
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML response with vector sync status table fragment
+    """
+    processing_status = await _get_processing_status(request)
+
+    # If vector sync is disabled or unavailable, return empty fragment
+    if not processing_status:
+        return HTMLResponse(
+            """
+            <div id="vector-sync-status" hx-get="/app/vector-sync/status" hx-trigger="every 10s" hx-swap="innerHTML">
+                <p style="color: #999;">Vector sync not available</p>
+            </div>
+            """
+        )
+
+    indexed_count = processing_status["indexed_count"]
+    pending_count = processing_status["pending_count"]
+    status = processing_status["status"]
+
+    # Format numbers with commas for readability
+    indexed_count_str = f"{indexed_count:,}"
+    pending_count_str = f"{pending_count:,}"
+
+    # Status badge color and text
+    if status == "syncing":
+        status_badge = (
+            '<span style="color: #ff9800; font-weight: bold;">⟳ Syncing</span>'
+        )
+    else:
+        status_badge = '<span style="color: #4caf50; font-weight: bold;">✓ Idle</span>'
+
+    # Return inner content only (container div is in initial page render)
+    html = f"""
+    <h2>Vector Sync Status</h2>
+    <table>
+        <tr>
+            <td><strong>Indexed Documents</strong></td>
+            <td>{indexed_count_str}</td>
+        </tr>
+        <tr>
+            <td><strong>Pending Documents</strong></td>
+            <td>{pending_count_str}</td>
+        </tr>
+        <tr>
+            <td><strong>Status</strong></td>
+            <td>{status_badge}</td>
+        </tr>
+    </table>
+    """
+
+    return HTMLResponse(html)
+
+
 async def _get_userinfo_endpoint(oauth_ctx: dict[str, Any]) -> str | None:
    """Get the correct userinfo endpoint based on OAuth mode.

@@ -296,57 +429,33 @@ async def user_info_html(request: Request) -> HTMLResponse:
    # Get vector sync processing status
    processing_status = await _get_processing_status(request)

+    # Check if user is admin (for Webhooks tab)
+    is_admin = False
+    try:
+        from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
+
+        # Get authenticated Nextcloud client
+        nc_client = await _get_authenticated_client_for_userinfo(request)
+        is_admin = await is_nextcloud_admin(request, nc_client._client)
+        await nc_client.close()
+    except Exception as e:
+        logger.warning(f"Failed to check admin status: {e}")
+        # Default to not admin if check fails
+
    # Check for error
    if "error" in user_context and user_context["error"] != "":
        # Get login URL dynamically
        oauth_ctx = getattr(request.app.state, "oauth_context", None)
        login_url = str(request.url_for("oauth_login")) if oauth_ctx else "/oauth/login"

-        error_html = f"""
-        <!DOCTYPE html>
-        <html lang="en">
-        <head>
-            <meta charset="UTF-8">
-            <meta name="viewport" content="width=device-width, initial-scale=1.0">
-            <title>Error - Nextcloud MCP Server</title>
-            <style>
-                body {{
-                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                    max-width: 800px;
-                    margin: 50px auto;
-                    padding: 20px;
-                    background-color: #f5f5f5;
-                }}
-                .container {{
-                    background: white;
-                    border-radius: 8px;
-                    padding: 30px;
-                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-                }}
-                h1 {{
-                    color: #d32f2f;
-                    margin-top: 0;
-                }}
-                .error {{
-                    background-color: #ffebee;
-                    border-left: 4px solid #d32f2f;
-                    padding: 15px;
-                    margin: 20px 0;
-                }}
-            </style>
-        </head>
-        <body>
-            <div class="container">
-                <h1>Error Retrieving User Info</h1>
-                <div class="error">
-                    <strong>Error:</strong> {user_context["error"]}
-                </div>
-                <p><a href="{login_url}">Login again</a></p>
-            </div>
-        </body>
-        </html>
-        """
-        return HTMLResponse(content=error_html)
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error Retrieving User Info",
+                error_message=user_context["error"],
+                login_url=login_url,
+            )
+        )

    # Build HTML response
    auth_mode = user_context.get("auth_mode", "unknown")
@@ -360,6 +469,16 @@ async def user_info_html(request: Request) -> HTMLResponse:
            str(request.url_for("oauth_logout")) if oauth_ctx else "/oauth/logout"
        )

+    # Get Nextcloud host for generating links to apps (used by viz tab)
+    # Use public issuer URL if available (for browser-accessible links),
+    # otherwise fall back to NEXTCLOUD_HOST from settings
+    from nextcloud_mcp_server.config import get_settings
+
+    settings = get_settings()
+    nextcloud_host_for_links = (
+        os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL") or settings.nextcloud_host
+    )
+
    # Build host info HTML (BasicAuth only)
    host_info_html = ""
    if auth_mode == "basic":
@@ -443,43 +562,15 @@ async def user_info_html(request: Request) -> HTMLResponse:
            </div>
            """

-    # Build vector sync status HTML
+    # Build vector sync status HTML (with htmx auto-refresh)
    vector_status_html = ""
    if processing_status:
-        indexed_count = processing_status["indexed_count"]
-        pending_count = processing_status["pending_count"]
-        status = processing_status["status"]
-
-        # Format numbers with commas for readability
-        indexed_count_str = f"{indexed_count:,}"
-        pending_count_str = f"{pending_count:,}"
-
-        # Status badge color and text
-        if status == "syncing":
-            status_badge = (
-                '<span style="color: #ff9800; font-weight: bold;">⟳ Syncing</span>'
-            )
-        else:
-            status_badge = (
-                '<span style="color: #4caf50; font-weight: bold;">✓ Idle</span>'
-            )
-
-        vector_status_html = f"""
-        <h2>Vector Sync Status</h2>
-        <table>
-            <tr>
-                <td><strong>Indexed Documents</strong></td>
-                <td>{indexed_count_str}</td>
-            </tr>
-            <tr>
-                <td><strong>Pending Documents</strong></td>
-                <td>{pending_count_str}</td>
-            </tr>
-            <tr>
-                <td><strong>Status</strong></td>
-                <td>{status_badge}</td>
-            </tr>
-        </table>
+        # Use htmx to load and auto-refresh the status fragment
+        # Container div stays stable, only inner content updates every 10s
+        vector_status_html = """
+            <div id="vector-sync-status" hx-get="/app/vector-sync/status" hx-trigger="load, every 10s" hx-swap="innerHTML">
+                <p style="color: #999;">Loading vector sync status...</p>
+            </div>
        """

    # Build IdP profile HTML
@@ -506,128 +597,63 @@ async def user_info_html(request: Request) -> HTMLResponse:
        <div class="warning">{user_context["idp_profile_error"]}</div>
        """

-    html_content = f"""
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>User Info - Nextcloud MCP Server</title>
-        <style>
-            body {{
-                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                max-width: 800px;
-                margin: 50px auto;
-                padding: 20px;
-                background-color: #f5f5f5;
-            }}
-            .container {{
-                background: white;
-                border-radius: 8px;
-                padding: 30px;
-                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-            }}
-            h1 {{
-                color: #0082c9;
-                margin-top: 0;
-                border-bottom: 2px solid #0082c9;
-                padding-bottom: 10px;
-            }}
-            h2 {{
-                color: #333;
-                margin-top: 30px;
-                border-bottom: 1px solid #e0e0e0;
-                padding-bottom: 5px;
-            }}
-            table {{
-                width: 100%;
-                border-collapse: collapse;
-                margin: 15px 0;
-            }}
-            td {{
-                padding: 10px;
-                border-bottom: 1px solid #e0e0e0;
-            }}
-            td:first-child {{
-                width: 200px;
-                color: #666;
-            }}
-            code {{
-                background-color: #f5f5f5;
-                padding: 2px 6px;
-                border-radius: 3px;
-                font-family: 'Courier New', monospace;
-            }}
-            .badge {{
-                display: inline-block;
-                padding: 3px 8px;
-                border-radius: 12px;
-                font-size: 12px;
-                font-weight: bold;
-                text-transform: uppercase;
-            }}
-            .badge-oauth {{
-                background-color: #4caf50;
-                color: white;
-            }}
-            .badge-basic {{
-                background-color: #2196f3;
-                color: white;
-            }}
-            .warning {{
-                background-color: #fff3cd;
-                border-left: 4px solid #ffc107;
-                padding: 15px;
-                margin: 15px 0;
-                color: #856404;
-            }}
-            .logout {{
-                margin-top: 30px;
-                padding-top: 20px;
-                border-top: 1px solid #e0e0e0;
-            }}
-            .button {{
-                display: inline-block;
-                padding: 10px 20px;
-                background-color: #d32f2f;
-                color: white;
-                text-decoration: none;
-                border-radius: 4px;
-                transition: background-color 0.3s;
-            }}
-            .button:hover {{
-                background-color: #b71c1c;
-            }}
-        </style>
-    </head>
-    <body>
-        <div class="container">
-            <h1>Nextcloud MCP Server - User Info</h1>
+    # Build user info tab content
+    user_info_tab_html = f"""
+        <h2>Authentication</h2>
+        <table>
+            <tr>
+                <td><strong>Username</strong></td>
+                <td>{username}</td>
+            </tr>
+            <tr>
+                <td><strong>Authentication Mode</strong></td>
+                <td><span class="badge badge-{auth_mode}">{auth_mode}</span></td>
+            </tr>
+        </table>

-            <h2>Authentication</h2>
-            <table>
-                <tr>
-                    <td><strong>Username</strong></td>
-                    <td>{username}</td>
-                </tr>
-                <tr>
-                    <td><strong>Authentication Mode</strong></td>
-                    <td><span class="badge badge-{auth_mode}">{auth_mode}</span></td>
-                </tr>
-            </table>
-
-            {host_info_html}
-            {session_info_html}
-            {vector_status_html}
-            {idp_profile_html}
-
-            {f'<div class="logout"><a href="{logout_url}" class="button">Logout</a></div>' if auth_mode == "oauth" else ""}
-        </div>
-    </body>
-    </html>
+        {host_info_html}
+        {session_info_html}
+        {idp_profile_html}
    """

-    return HTMLResponse(content=html_content)
+    # Determine which tabs to show
+    show_vector_sync_tab = processing_status is not None
+    show_webhooks_tab = is_admin
+
+    # Build vector sync tab content (only if enabled)
+    vector_sync_tab_html = ""
+    if show_vector_sync_tab:
+        vector_sync_tab_html = vector_status_html
+
+    # Build webhooks tab content (only if admin)
+    webhooks_tab_html = ""
+    if show_webhooks_tab:
+        webhooks_tab_html = """
+            <div hx-get="/app/webhooks" hx-trigger="load" hx-swap="outerHTML">
+                <p style="color: #999;">Loading webhook management...</p>
+            </div>
+        """
+
+    # Check if vector sync is enabled (needed for Welcome tab)
+    vector_sync_enabled = os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
+
+    # Render template
+    template = _jinja_env.get_template("user_info.html")
+    return HTMLResponse(
+        content=template.render(
+            user_info_tab_html=user_info_tab_html,
+            vector_sync_tab_html=vector_sync_tab_html,
+            webhooks_tab_html=webhooks_tab_html,
+            show_vector_sync_tab=show_vector_sync_tab,
+            show_webhooks_tab=show_webhooks_tab,
+            logout_url=logout_url if auth_mode == "oauth" else None,
+            nextcloud_host_for_links=nextcloud_host_for_links,
+            # Additional context for Welcome tab
+            vector_sync_enabled=vector_sync_enabled,
+            username=username,
+            auth_mode=auth_mode,
+        )
+    )


@requires("authenticated", redirect="oauth_login")
@@ -647,17 +673,12 @@ async def revoke_session(request: Request) -> HTMLResponse:
    oauth_ctx = getattr(request.app.state, "oauth_context", None)

    if not oauth_ctx:
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            """
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>OAuth mode not enabled</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message="OAuth mode not enabled",
+            ),
            status_code=400,
        )

@@ -665,17 +686,12 @@ async def revoke_session(request: Request) -> HTMLResponse:
    session_id = request.cookies.get("mcp_session")

    if not storage or not session_id:
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            """
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>Session not found</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message="Session not found",
+            ),
            status_code=400,
        )

@@ -688,57 +704,26 @@ async def revoke_session(request: Request) -> HTMLResponse:
        # Redirect back to user page
        user_page_url = str(request.url_for("user_info_html"))

+        template = _jinja_env.get_template("success.html")
        return HTMLResponse(
-            f"""
-            <!DOCTYPE html>
-            <html lang="en">
-            <head>
-                <meta charset="UTF-8">
-                <meta http-equiv="refresh" content="2;url={user_page_url}">
-                <title>Background Access Revoked</title>
-                <style>
-                    body {{
-                        font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-                        max-width: 600px;
-                        margin: 50px auto;
-                        padding: 20px;
-                        text-align: center;
-                    }}
-                    .success {{
-                        background-color: #e8f5e9;
-                        border: 2px solid #4caf50;
-                        padding: 30px;
-                        border-radius: 8px;
-                    }}
-                    h1 {{
-                        color: #4caf50;
-                    }}
-                </style>
-            </head>
-            <body>
-                <div class="success">
-                    <h1>✓ Background Access Revoked</h1>
-                    <p>Your refresh token has been deleted successfully.</p>
-                    <p>Browser session remains active.</p>
-                    <p>Redirecting back to user page...</p>
-                </div>
-            </body>
-            </html>
-            """
+            content=template.render(
+                success_title="✓ Background Access Revoked",
+                success_messages=[
+                    "Your refresh token has been deleted successfully.",
+                    "Browser session remains active.",
+                ],
+                redirect_url=user_page_url,
+                redirect_delay=2,
+            )
        )

    except Exception as e:
        logger.error(f"Failed to revoke background access: {e}")
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            f"""
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>Failed to revoke background access: {e}</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message=f"Failed to revoke background access: {e}",
+            ),
            status_code=500,
        )
@@ -0,0 +1,669 @@
+"""Vector visualization routes for testing search algorithms.
+
+Provides a web UI for users to test different search algorithms on their own
+indexed documents and visualize results in 3D space using PCA.
+
+All processing happens server-side following ADR-012:
+- Search execution via shared search/algorithms.py
+- Query embedding generation
+- PCA dimensionality reduction (768-dim → 3D)
+- Only 3D coordinates + metadata sent to client
+- Bandwidth-efficient (3 floats per doc vs 768)
+"""
+
+import logging
+import time
+from pathlib import Path
+
+import numpy as np
+from jinja2 import Environment, FileSystemLoader
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse, JSONResponse
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.observability.tracing import trace_operation
+from nextcloud_mcp_server.search import (
+    BM25HybridSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)
+from nextcloud_mcp_server.vector.pca import PCA
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_html(request: Request) -> HTMLResponse:
+    """Vector visualization page with search controls and interactive plot.
+
+    Provides UI for testing search algorithms with real-time visualization.
+    Requires vector sync to be enabled.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML page with search interface
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return HTMLResponse(
+            """
+            <div>
+                <h2>Vector Visualization</h2>
+                <div style="padding: 20px; background: #fff3cd; border: 1px solid #ffc107; border-radius: 4px;">
+                    Vector sync is not enabled. Set VECTOR_SYNC_ENABLED=true to use this feature.
+                </div>
+            </div>
+            """
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name
+        if hasattr(request.user, "display_name")
+        else "unknown"
+    )
+
+    # Load and render template
+    template = _jinja_env.get_template("vector_viz.html")
+    html_content = template.render(username=username)
+    return HTMLResponse(content=html_content)
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_search(request: Request) -> JSONResponse:
+    """Execute server-side search and return 3D coordinates + results.
+
+    All processing happens server-side:
+    1. Execute search via shared algorithm module
+    2. Generate query embedding
+    3. Fetch matching vectors from Qdrant
+    4. Apply PCA reduction (768-dim → 3D) to query + documents
+    5. Return coordinates + metadata only
+
+    Args:
+        request: Starlette request with query parameters
+
+    Returns:
+        JSON response with coordinates_3d and results (including query point)
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"success": False, "error": "Vector sync not enabled"},
+            status_code=400,
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name if hasattr(request.user, "display_name") else None
+    )
+
+    if not username:
+        return JSONResponse(
+            {"success": False, "error": "User not authenticated"},
+            status_code=401,
+        )
+
+    # Parse query parameters
+    query = request.query_params.get("query", "")
+    algorithm = request.query_params.get("algorithm", "bm25_hybrid")
+    limit = int(request.query_params.get("limit", "50"))
+    score_threshold = float(request.query_params.get("score_threshold", "0.0"))
+    fusion = request.query_params.get("fusion", "rrf")  # Default to RRF
+
+    # Parse doc_types (comma-separated list, None = all types)
+    doc_types_param = request.query_params.get("doc_types", "")
+    doc_types = doc_types_param.split(",") if doc_types_param else None
+
+    logger.info(
+        f"Viz search: user={username}, query='{query}', "
+        f"algorithm={algorithm}, fusion={fusion}, limit={limit}, doc_types={doc_types}"
+    )
+
+    try:
+        # Start total request timer
+        request_start = time.perf_counter()
+        # Get authenticated HTTP client from session
+        # In BasicAuth mode: uses username/password from session
+        # In OAuth mode: uses access token from session
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+
+        with trace_operation("vector_viz.get_auth_client"):
+            auth_client_ctx = await _get_authenticated_client_for_userinfo(request)
+
+        async with auth_client_ctx as nc_client:  # noqa: F841
+            # Create search algorithm (no client needed - verification removed)
+            if algorithm == "semantic":
+                search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+            elif algorithm == "bm25_hybrid":
+                search_algo = BM25HybridSearchAlgorithm(
+                    score_threshold=score_threshold, fusion=fusion
+                )
+            else:
+                return JSONResponse(
+                    {"success": False, "error": f"Unknown algorithm: {algorithm}"},
+                    status_code=400,
+                )
+
+            # Execute search (supports cross-app when doc_types=None)
+            # Get unverified results with buffer for filtering
+            search_start = time.perf_counter()
+            all_results = []
+            if doc_types is None or len(doc_types) == 0:
+                # Cross-app search - search all indexed types
+                with trace_operation(
+                    "vector_viz.search_execute",
+                    attributes={
+                        "search.algorithm": algorithm,
+                        "search.limit": limit * 2,
+                        "search.doc_type": "all",
+                    },
+                ):
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Buffer for verification filtering
+                        doc_type=None,  # Search all types
+                        score_threshold=score_threshold,
+                    )
+                all_results.extend(unverified_results)
+            else:
+                # Search each document type and combine
+                for doc_type in doc_types:
+                    with trace_operation(
+                        "vector_viz.search_execute",
+                        attributes={
+                            "search.algorithm": algorithm,
+                            "search.limit": limit * 2,
+                            "search.doc_type": doc_type,
+                        },
+                    ):
+                        unverified_results = await search_algo.search(
+                            query=query,
+                            user_id=username,
+                            limit=limit * 2,  # Buffer for verification filtering
+                            doc_type=doc_type,
+                            score_threshold=score_threshold,
+                        )
+                    all_results.extend(unverified_results)
+                # Sort by score before verification
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # No verification needed for visualization - we only need Qdrant metadata
+            # (title, excerpt, doc_type) which is already in search results.
+            # Verification is only needed for sampling (LLM needs full content).
+            search_results = all_results[:limit]
+            search_duration = time.perf_counter() - search_start
+
+        # Store original scores and normalize for visualization
+        # (best result = 1.0, worst result = 0.0 within THIS result set)
+        # This makes visual encoding meaningful regardless of RRF normalization
+        with trace_operation(
+            "vector_viz.score_normalize",
+            attributes={"normalize.num_results": len(search_results)},
+        ):
+            if search_results:
+                scores = [r.score for r in search_results]
+                min_score, max_score = min(scores), max(scores)
+                score_range = max_score - min_score if max_score > min_score else 1.0
+
+                logger.info(
+                    f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
+                    f"→ [0.0, 1.0]"
+                )
+
+                # Store original score and rescale to 0-1 for visualization
+                for r in search_results:
+                    # Store original score before normalization
+                    r.original_score = r.score
+                    # Rescale for visual encoding
+                    r.score = (r.score - min_score) / score_range
+
+        if not search_results:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [],
+                    "coordinates_3d": [],
+                    "query_coords": [],
+                    "message": "No results found",
+                }
+            )
+
+        # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
+        vector_fetch_start = time.perf_counter()
+
+        with trace_operation("vector_viz.get_qdrant_client"):
+            qdrant_client = await get_qdrant_client()
+
+        chunk_vectors_map = {}  # Map (doc_id, chunk_start, chunk_end) -> vector
+
+        # Collect point IDs from search results for batch retrieval
+        # point_id is the Qdrant internal ID returned by search algorithms
+        point_ids = [r.point_id for r in search_results if r.point_id]
+
+        if point_ids:
+            # Single batch retrieve call instead of N sequential scroll calls
+            # This is ~50x faster for 50 results (1 HTTP request vs 50)
+            with trace_operation(
+                "vector_viz.vector_retrieve",
+                attributes={"retrieve.num_points": len(point_ids)},
+            ):
+                points_response = await qdrant_client.retrieve(
+                    collection_name=settings.get_collection_name(),
+                    ids=point_ids,
+                    with_vectors=["dense"],
+                    with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
+                )
+
+            # Build chunk_vectors_map from batch response
+            for point in points_response:
+                if point.vector is not None:
+                    # Extract dense vector (handle both named and unnamed vectors)
+                    if isinstance(point.vector, dict):
+                        vector = point.vector.get("dense")
+                    else:
+                        vector = point.vector
+
+                    if vector is not None and point.payload:
+                        doc_id = point.payload.get("doc_id")
+                        chunk_start = point.payload.get("chunk_start_offset")
+                        chunk_end = point.payload.get("chunk_end_offset")
+                        chunk_key = (doc_id, chunk_start, chunk_end)
+                        chunk_vectors_map[chunk_key] = vector
+
+        vector_fetch_duration = time.perf_counter() - vector_fetch_start
+
+        if len(chunk_vectors_map) < 2:
+            # Not enough chunks for PCA
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [
+                        {
+                            "id": r.id,
+                            "doc_type": r.doc_type,
+                            "title": r.title,
+                            "excerpt": r.excerpt,
+                            "score": r.score,
+                        }
+                        for r in search_results
+                    ],
+                    "coordinates_3d": [[0, 0, 0]] * len(search_results),
+                    "query_coords": [0, 0, 0],
+                    "message": "Not enough chunks for PCA",
+                }
+            )
+
+        # Detect embedding dimension from first available vector
+        embedding_dim = None
+        for vector in chunk_vectors_map.values():
+            if vector is not None:
+                embedding_dim = len(vector)
+                break
+
+        if embedding_dim is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Could not determine embedding dimension",
+                },
+                status_code=500,
+            )
+
+        logger.info(f"Detected embedding dimension: {embedding_dim}")
+
+        # Build chunk vectors array in search_results order (1:1 mapping)
+        chunk_vectors = []
+        for result in search_results:
+            chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
+            if chunk_key in chunk_vectors_map:
+                chunk_vectors.append(chunk_vectors_map[chunk_key])
+            else:
+                # Chunk not found in vectors (shouldn't happen)
+                logger.warning(
+                    f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
+                )
+                # Use zero vector as fallback
+                chunk_vectors.append(np.zeros(embedding_dim))
+
+        chunk_vectors = np.array(chunk_vectors)
+
+        # Reuse query embedding from search algorithm (avoids redundant embedding call)
+        query_embed_start = time.perf_counter()
+        if search_algo.query_embedding is not None:
+            query_embedding = search_algo.query_embedding
+            logger.info(
+                f"Reusing query embedding from search algorithm "
+                f"(dimension={len(query_embedding)})"
+            )
+        else:
+            # Fallback: generate embedding if not available from search
+            from nextcloud_mcp_server.embedding.service import get_embedding_service
+
+            embedding_service = get_embedding_service()
+            query_embedding = await embedding_service.embed(query)
+            logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
+        query_embed_duration = time.perf_counter() - query_embed_start
+
+        # Combine query vector with chunk vectors for PCA
+        # Query will be the last point in the array
+        all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
+
+        # Normalize vectors to unit length (L2 normalization)
+        # This is critical because Qdrant uses COSINE distance, which only measures
+        # vector direction (angle), not magnitude. PCA uses Euclidean distance which
+        # considers both direction and magnitude. By normalizing to unit length,
+        # Euclidean distances in PCA space will match cosine distances.
+        norms = np.linalg.norm(all_vectors, axis=1, keepdims=True)
+
+        # Check for zero-norm vectors (can happen with empty/corrupted embeddings)
+        zero_norm_mask = norms[:, 0] < 1e-10
+        if zero_norm_mask.any():
+            zero_indices = np.where(zero_norm_mask)[0]
+            logger.warning(
+                f"Found {zero_norm_mask.sum()} zero-norm vectors at indices {zero_indices.tolist()}. "
+                "Replacing with small epsilon to avoid division by zero."
+            )
+            # Replace zero norms with small epsilon to avoid NaN
+            norms[zero_norm_mask] = 1e-10
+
+        all_vectors_normalized = all_vectors / norms
+        logger.info(
+            f"Normalized vectors: query_norm={norms[-1][0]:.3f}, "
+            f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]"
+        )
+
+        # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
+        # Run in thread pool to avoid blocking the event loop (CPU-bound)
+        pca_start = time.perf_counter()
+
+        def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
+            pca = PCA(n_components=3)
+            coords = pca.fit_transform(vectors)
+            return coords, pca
+
+        import anyio
+
+        with trace_operation(
+            "vector_viz.pca_compute",
+            attributes={
+                "pca.num_vectors": len(all_vectors_normalized),
+                "pca.embedding_dim": embedding_dim,
+            },
+        ):
+            coords_3d, pca = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                lambda: _compute_pca(all_vectors_normalized)
+            )
+        pca_duration = time.perf_counter() - pca_start
+
+        # After fit, these attributes are guaranteed to be set
+        assert pca.explained_variance_ratio_ is not None
+
+        # Check for NaN values in PCA output (numerical instability)
+        nan_mask = np.isnan(coords_3d)
+        if nan_mask.any():
+            nan_rows = np.where(nan_mask.any(axis=1))[0]
+            logger.error(
+                f"Found NaN values in PCA output at {len(nan_rows)} points: {nan_rows.tolist()[:10]}. "
+                "Replacing NaN with 0.0 to prevent JSON serialization error."
+            )
+            # Replace NaN with 0 to allow JSON serialization
+            coords_3d = np.nan_to_num(coords_3d, nan=0.0)
+
+        # Split query coords from chunk coords
+        # Round to 2 decimal places for cleaner display
+        query_coords_3d = [
+            round(float(x), 2) for x in coords_3d[-1]
+        ]  # Last point is query
+        chunk_coords_3d = coords_3d[:-1]  # All but last are chunks
+
+        logger.info(
+            f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
+            f"PC2={pca.explained_variance_ratio_[1]:.3f}, "
+            f"PC3={pca.explained_variance_ratio_[2]:.3f}"
+        )
+        logger.info(
+            f"Embedding stats: chunks={len(chunk_vectors)}, "
+            f"query_dim={len(query_embedding)}, chunk_vector_dim={chunk_vectors.shape[1] if chunk_vectors.size > 0 else 0}"
+        )
+
+        # Coordinates already match search_results order (1:1 mapping)
+        result_coords = [
+            [round(float(x), 2) for x in coord] for coord in chunk_coords_3d
+        ]
+
+        # Build response
+        response_results = [
+            {
+                "id": r.id,
+                "doc_type": r.doc_type,
+                "title": r.title,
+                "excerpt": r.excerpt,
+                "score": r.score,  # Normalized score for visual encoding (0-1)
+                "original_score": getattr(
+                    r, "original_score", r.score
+                ),  # Raw score from algorithm
+                "chunk_start_offset": r.chunk_start_offset,
+                "chunk_end_offset": r.chunk_end_offset,
+            }
+            for r in search_results
+        ]
+
+        # Calculate total request duration
+        total_duration = time.perf_counter() - request_start
+
+        # Log comprehensive timing metrics
+        logger.info(
+            f"Viz search timing: total={total_duration * 1000:.1f}ms, "
+            f"search={search_duration * 1000:.1f}ms ({search_duration / total_duration * 100:.1f}%), "
+            f"vector_fetch={vector_fetch_duration * 1000:.1f}ms ({vector_fetch_duration / total_duration * 100:.1f}%), "
+            f"query_embed={query_embed_duration * 1000:.1f}ms ({query_embed_duration / total_duration * 100:.1f}%), "
+            f"pca={pca_duration * 1000:.1f}ms ({pca_duration / total_duration * 100:.1f}%), "
+            f"results={len(search_results)}, chunk_vectors={len(chunk_vectors)}"
+        )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "results": response_results,
+                "coordinates_3d": result_coords[: len(search_results)],
+                "query_coords": query_coords_3d,
+                "pca_variance": {
+                    "pc1": float(pca.explained_variance_ratio_[0]),
+                    "pc2": float(pca.explained_variance_ratio_[1]),
+                    "pc3": float(pca.explained_variance_ratio_[2]),
+                },
+                "timing": {
+                    "total_ms": round(total_duration * 1000, 2),
+                    "search_ms": round(search_duration * 1000, 2),
+                    "vector_fetch_ms": round(vector_fetch_duration * 1000, 2),
+                    "query_embed_ms": round(query_embed_duration * 1000, 2),
+                    "pca_ms": round(pca_duration * 1000, 2),
+                    "num_results": len(search_results),
+                    "num_chunk_vectors": len(chunk_vectors),
+                },
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Viz search error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def chunk_context_endpoint(request: Request) -> JSONResponse:
+    """Fetch chunk text with surrounding context for visualization.
+
+    This endpoint retrieves the matched chunk along with surrounding text
+    to provide context for the search result. Used by the viz pane to
+    display chunks inline.
+
+    Query parameters:
+        doc_type: Document type (e.g., "note")
+        doc_id: Document ID
+        start: Chunk start offset (character position)
+        end: Chunk end offset (character position)
+        context: Characters of context before/after (default: 500)
+
+    Returns:
+        JSON with chunk_text, before_context, after_context, and flags
+    """
+    try:
+        # Get query parameters
+        doc_type = request.query_params.get("doc_type")
+        doc_id = request.query_params.get("doc_id")
+        start_str = request.query_params.get("start")
+        end_str = request.query_params.get("end")
+        context_chars = int(request.query_params.get("context", "500"))
+
+        # Validate required parameters
+        if not all([doc_type, doc_id, start_str, end_str]):
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Missing required parameters: doc_type, doc_id, start, end",
+                },
+                status_code=400,
+            )
+
+        # Type assertions - we validated these above
+        assert doc_type is not None
+        assert doc_id is not None
+        assert start_str is not None
+        assert end_str is not None
+
+        start = int(start_str)
+        end = int(end_str)
+        # Convert doc_id to int (all document types use int IDs)
+        doc_id_int = int(doc_id)
+
+        # Get authenticated Nextcloud client
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+        from nextcloud_mcp_server.search.context import get_chunk_with_context
+
+        # Use context expansion module to fetch chunk with surrounding context
+        async with await _get_authenticated_client_for_userinfo(request) as nc_client:
+            chunk_context = await get_chunk_with_context(
+                nc_client=nc_client,
+                user_id=request.user.display_name,  # User ID from auth
+                doc_id=doc_id_int,
+                doc_type=doc_type,
+                chunk_start=start,
+                chunk_end=end,
+                context_chars=context_chars,
+            )
+
+        # Check if context expansion succeeded
+        if chunk_context is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
+                },
+                status_code=404,
+            )
+
+        logger.info(
+            f"Fetched chunk context for {doc_type}_{doc_id}: "
+            f"chunk_len={len(chunk_context.chunk_text)}, "
+            f"before_len={len(chunk_context.before_context)}, "
+            f"after_len={len(chunk_context.after_context)}"
+        )
+
+        # For PDF files, also fetch the highlighted page image from Qdrant
+        highlighted_page_image = None
+        page_number = None
+        if doc_type == "file":
+            try:
+                from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+                settings = get_settings()
+                qdrant_client = await get_qdrant_client()
+                username = request.user.display_name
+
+                # Query for this specific chunk's highlighted image
+                points_response = await qdrant_client.scroll(
+                    collection_name=settings.get_collection_name(),
+                    scroll_filter=Filter(
+                        must=[
+                            get_placeholder_filter(),
+                            FieldCondition(
+                                key="doc_id", match=MatchValue(value=doc_id_int)
+                            ),
+                            FieldCondition(
+                                key="user_id", match=MatchValue(value=username)
+                            ),
+                            FieldCondition(
+                                key="chunk_start_offset", match=MatchValue(value=start)
+                            ),
+                            FieldCondition(
+                                key="chunk_end_offset", match=MatchValue(value=end)
+                            ),
+                        ]
+                    ),
+                    limit=1,
+                    with_vectors=False,
+                    with_payload=["highlighted_page_image", "page_number"],
+                )
+
+                points = points_response[0]
+                if points and points[0].payload:
+                    highlighted_page_image = points[0].payload.get(
+                        "highlighted_page_image"
+                    )
+                    page_number = points[0].payload.get("page_number")
+                    if highlighted_page_image:
+                        logger.info(
+                            f"Found highlighted image for chunk: "
+                            f"page={page_number}, image_size={len(highlighted_page_image)}"
+                        )
+            except Exception as e:
+                logger.warning(f"Failed to fetch highlighted image: {e}")
+
+        # Return response compatible with frontend expectations
+        response_data: dict = {
+            "success": True,
+            "chunk_text": chunk_context.chunk_text,
+            "before_context": chunk_context.before_context,
+            "after_context": chunk_context.after_context,
+            "has_more_before": chunk_context.has_before_truncation,
+            "has_more_after": chunk_context.has_after_truncation,
+        }
+
+        # Add image data if available
+        if highlighted_page_image:
+            response_data["highlighted_page_image"] = highlighted_page_image
+            response_data["page_number"] = page_number
+
+        return JSONResponse(response_data)
+
+    except ValueError as e:
+        logger.error(f"Invalid parameter format: {e}")
+        return JSONResponse(
+            {"success": False, "error": f"Invalid parameter format: {e}"},
+            status_code=400,
+        )
+    except Exception as e:
+        logger.error(f"Chunk context error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
@@ -0,0 +1,541 @@
+"""Webhook management routes for admin UI.
+
+Provides browser-based endpoints for admin users to manage webhook configurations
+using preset templates. Only accessible to Nextcloud administrators.
+"""
+
+import logging
+import os
+
+import httpx
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse
+
+from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
+from nextcloud_mcp_server.client.webhooks import WebhooksClient
+from nextcloud_mcp_server.server.webhook_presets import (
+    WEBHOOK_PRESETS,
+    filter_presets_by_installed_apps,
+    get_preset,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _get_storage(request: Request):
+    """Get storage instance from app state.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        RefreshTokenStorage instance or None
+    """
+    # Try browser_app state first (for /app routes)
+    storage = getattr(request.app.state, "storage", None)
+
+    # Try oauth_context if in OAuth mode
+    if not storage:
+        oauth_ctx = getattr(request.app.state, "oauth_context", None)
+        if oauth_ctx:
+            storage = oauth_ctx.get("storage")
+
+    return storage
+
+
+async def _get_installed_apps(http_client: httpx.AsyncClient) -> list[str]:
+    """Get list of installed and enabled apps from Nextcloud capabilities.
+
+    Args:
+        http_client: Authenticated HTTP client
+
+    Returns:
+        List of installed app names (e.g., ["notes", "calendar", "forms"])
+    """
+    try:
+        response = await http_client.get(
+            "/ocs/v2.php/cloud/capabilities",
+            headers={"OCS-APIRequest": "true", "Accept": "application/json"},
+        )
+        response.raise_for_status()
+        data = response.json()
+
+        # Extract app names from capabilities
+        capabilities = data.get("ocs", {}).get("data", {}).get("capabilities", {})
+        # Filter out core NC capabilities (not apps)
+        core_keys = {"version", "core"}
+        app_keys = set(capabilities.keys()) - core_keys
+        return sorted(app_keys)
+    except Exception as e:
+        logger.warning(f"Failed to get installed apps from capabilities: {e}")
+        return []
+
+
+def _get_webhook_uri() -> str:
+    """Get the webhook endpoint URI for this MCP server.
+
+    This function determines the correct webhook URL based on the environment:
+    1. Uses WEBHOOK_INTERNAL_URL if explicitly set (highest priority)
+    2. Detects Docker environment and uses internal service name
+    3. Falls back to NEXTCLOUD_MCP_SERVER_URL
+
+    In Docker environments, Nextcloud needs to reach the MCP service using
+    the internal Docker network hostname (e.g., http://mcp:8000), not localhost.
+
+    Returns:
+        Full webhook endpoint URL accessible from Nextcloud
+    """
+    # Explicit override (highest priority)
+    webhook_url = os.getenv("WEBHOOK_INTERNAL_URL")
+    if webhook_url:
+        return f"{webhook_url}/webhooks/nextcloud"
+
+    # Detect Docker environment
+    # Check for common Docker indicators
+    is_docker = (
+        os.path.exists("/.dockerenv")  # Docker container marker file
+        or os.path.exists("/run/.containerenv")  # Podman marker
+        or os.getenv("DOCKER_CONTAINER") == "true"  # Explicit flag
+    )
+
+    if is_docker:
+        # In Docker, use internal service name from NEXTCLOUD_MCP_SERVICE_NAME
+        # or default to 'mcp' (docker-compose service name)
+        service_name = os.getenv("NEXTCLOUD_MCP_SERVICE_NAME", "mcp")
+        port = os.getenv("NEXTCLOUD_MCP_PORT", "8000")
+        logger.debug(
+            f"Docker environment detected, using internal URL: http://{service_name}:{port}"
+        )
+        return f"http://{service_name}:{port}/webhooks/nextcloud"
+
+    # Fallback to configured server URL (for non-Docker deployments)
+    server_url = os.getenv("NEXTCLOUD_MCP_SERVER_URL", "http://localhost:8000")
+    return f"{server_url}/webhooks/nextcloud"
+
+
+async def _get_authenticated_client(request: Request) -> httpx.AsyncClient:
+    """Get an authenticated HTTP client for Nextcloud API calls.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        Authenticated httpx.AsyncClient
+
+    Raises:
+        RuntimeError: If unable to create authenticated client
+    """
+    # Get OAuth context from app state
+    oauth_ctx = getattr(request.app.state, "oauth_context", None)
+
+    # BasicAuth mode - use credentials from environment
+    if not oauth_ctx:
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        username = os.getenv("NEXTCLOUD_USERNAME")
+        password = os.getenv("NEXTCLOUD_PASSWORD")
+
+        if not all([nextcloud_host, username, password]):
+            raise RuntimeError("BasicAuth credentials not configured")
+
+        assert nextcloud_host is not None  # Type narrowing for type checker
+        assert username is not None and password is not None  # Type narrowing
+        return httpx.AsyncClient(
+            base_url=nextcloud_host,
+            auth=(username, password),
+            timeout=30.0,
+        )
+
+    # OAuth mode - get token from session
+    storage = oauth_ctx.get("storage")
+    session_id = request.cookies.get("mcp_session")
+
+    if not storage or not session_id:
+        raise RuntimeError("Session not found")
+
+    token_data = await storage.get_refresh_token(session_id)
+    if not token_data or "access_token" not in token_data:
+        raise RuntimeError("No access token found in session")
+
+    access_token = token_data["access_token"]
+    nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+    if not nextcloud_host:
+        raise RuntimeError("Nextcloud host not configured")
+
+    return httpx.AsyncClient(
+        base_url=nextcloud_host,
+        headers={"Authorization": f"Bearer {access_token}"},
+        timeout=30.0,
+    )
+
+
+async def _get_enabled_presets(
+    webhooks_client: WebhooksClient,
+    storage=None,
+) -> dict[str, list[int]]:
+    """Get currently enabled webhook presets.
+
+    Reads from database first for better performance. Falls back to API if needed.
+
+    Args:
+        webhooks_client: Webhooks API client
+        storage: Optional RefreshTokenStorage instance
+
+    Returns:
+        Dictionary mapping preset_id to list of webhook IDs
+    """
+    try:
+        # Try database first (faster, works offline)
+        if storage:
+            all_webhooks = await storage.list_all_webhooks()
+            enabled_presets: dict[str, list[int]] = {}
+
+            for webhook in all_webhooks:
+                preset_id = webhook["preset_id"]
+                webhook_id = webhook["webhook_id"]
+
+                if preset_id not in enabled_presets:
+                    enabled_presets[preset_id] = []
+                enabled_presets[preset_id].append(webhook_id)
+
+            return enabled_presets
+
+        # Fallback to API query
+        registered_webhooks = await webhooks_client.list_webhooks()
+        webhook_uri = _get_webhook_uri()
+
+        # Group webhooks by preset based on matching events
+        enabled_presets: dict[str, list[int]] = {}
+
+        for preset_id, preset in WEBHOOK_PRESETS.items():
+            preset_event_classes = {event["event"] for event in preset["events"]}
+            matching_webhooks = []
+
+            for webhook in registered_webhooks:
+                # Check if webhook matches this preset
+                if (
+                    webhook.get("uri") == webhook_uri
+                    and webhook.get("event") in preset_event_classes
+                ):
+                    matching_webhooks.append(webhook["id"])
+
+            if matching_webhooks:
+                enabled_presets[preset_id] = matching_webhooks
+
+        return enabled_presets
+
+    except Exception as e:
+        logger.error(f"Failed to list webhooks: {e}")
+        return {}
+
+
+@requires("authenticated", redirect="oauth_login")
+async def webhook_management_pane(request: Request) -> HTMLResponse:
+    """Webhook management pane - returns HTML for webhook configuration.
+
+    This endpoint checks if the user is an admin and returns either:
+    - Admin view: Webhook management interface with preset controls
+    - Non-admin view: Message indicating admin-only access
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML response with webhook management interface or access denied message
+    """
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+
+        if not is_admin:
+            return HTMLResponse(
+                content="""
+                <div class="info-message">
+                    <p><strong>Admin Access Required</strong></p>
+                    <p>Webhook management is only available to Nextcloud administrators.</p>
+                    <p>Your account does not have admin privileges.</p>
+                </div>
+                """
+            )
+
+        # Get webhooks client
+        webhooks_client = WebhooksClient(http_client, username)
+
+        # Get storage for database-backed webhook tracking
+        storage = _get_storage(request)
+
+        # Get installed apps to filter presets
+        installed_apps = await _get_installed_apps(http_client)
+        logger.debug(f"Installed apps: {installed_apps}")
+
+        # Get currently enabled presets (from database or API)
+        enabled_presets = await _get_enabled_presets(webhooks_client, storage)
+
+        # Filter presets based on installed apps
+        available_presets = filter_presets_by_installed_apps(installed_apps)
+
+        # Build preset cards HTML
+        preset_cards_html = ""
+        for preset_id, preset in available_presets:
+            is_enabled = preset_id in enabled_presets
+            num_webhooks = len(enabled_presets.get(preset_id, []))
+
+            # Status badge
+            if is_enabled:
+                status_badge = f'<span style="color: #4caf50; font-weight: bold;">✓ Enabled ({num_webhooks} webhooks)</span>'
+                action_button = f"""
+                <button
+                    hx-delete="/app/webhooks/disable/{preset_id}"
+                    hx-target="#preset-{preset_id}"
+                    hx-swap="outerHTML"
+                    class="button"
+                    style="background-color: #ff9800;">
+                    Disable
+                </button>
+                """
+            else:
+                status_badge = '<span style="color: #999;">Not Enabled</span>'
+                action_button = f"""
+                <button
+                    hx-post="/app/webhooks/enable/{preset_id}"
+                    hx-target="#preset-{preset_id}"
+                    hx-swap="outerHTML"
+                    class="button button-primary">
+                    Enable
+                </button>
+                """
+
+            preset_cards_html += f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div>{status_badge}</div>
+                    <div>{action_button}</div>
+                </div>
+            </div>
+            """
+
+        # Get webhook endpoint URL for display
+        webhook_uri = _get_webhook_uri()
+
+        html_content = f"""
+        <h2>Webhook Management</h2>
+        <div class="info-message">
+            <p><strong>About Webhooks</strong></p>
+            <p>Webhooks enable real-time synchronization by notifying this server when content changes in Nextcloud.</p>
+            <p><strong>Endpoint:</strong> <code>{webhook_uri}</code></p>
+        </div>
+
+        <h3 style="margin-top: 30px;">Available Presets</h3>
+        <p style="color: #666;">Enable webhook presets with one click for common synchronization scenarios.</p>
+        <p style="color: #999; font-size: 13px; margin-top: 5px;">Showing {len(available_presets)} preset(s) for your installed apps ({len(installed_apps)} detected)</p>
+
+        {preset_cards_html}
+        """
+
+        return HTMLResponse(content=html_content)
+
+    except Exception as e:
+        logger.error(f"Error loading webhook management pane: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f"""
+            <div class="warning">
+                <p><strong>Error Loading Webhooks</strong></p>
+                <p>{str(e)}</p>
+            </div>
+            """,
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def enable_webhook_preset(request: Request) -> HTMLResponse:
+    """Enable a webhook preset by registering all webhooks.
+
+    Args:
+        request: Starlette request object (preset_id in path)
+
+    Returns:
+        HTML response with updated preset card
+    """
+    preset_id = request.path_params["preset_id"]
+
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+        if not is_admin:
+            return HTMLResponse(
+                content='<div class="warning">Admin access required</div>',
+                status_code=403,
+            )
+
+        # Get preset configuration
+        preset = get_preset(preset_id)
+        if not preset:
+            return HTMLResponse(
+                content=f'<div class="warning">Unknown preset: {preset_id}</div>',
+                status_code=404,
+            )
+
+        # Register webhooks
+        webhooks_client = WebhooksClient(http_client, username)
+        webhook_uri = _get_webhook_uri()
+        registered_ids = []
+
+        for event_config in preset["events"]:
+            webhook_data = await webhooks_client.create_webhook(
+                event=event_config["event"],
+                uri=webhook_uri,
+                event_filter=event_config["filter"] if event_config["filter"] else None,
+            )
+            webhook_id = webhook_data["id"]
+            registered_ids.append(webhook_id)
+            logger.info(f"Registered webhook {webhook_id} for {event_config['event']}")
+
+        # Persist webhook IDs to database
+        storage = _get_storage(request)
+        if storage:
+            for webhook_id in registered_ids:
+                await storage.store_webhook(webhook_id, preset_id)
+            logger.info(
+                f"Persisted {len(registered_ids)} webhook(s) for preset '{preset_id}' to database"
+            )
+
+        # Return updated card
+        num_webhooks = len(registered_ids)
+        return HTMLResponse(
+            content=f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div><span style="color: #4caf50; font-weight: bold;">✓ Enabled ({num_webhooks} webhooks)</span></div>
+                    <div>
+                        <button
+                            hx-delete="/app/webhooks/disable/{preset_id}"
+                            hx-target="#preset-{preset_id}"
+                            hx-swap="outerHTML"
+                            class="button"
+                            style="background-color: #ff9800;">
+                            Disable
+                        </button>
+                    </div>
+                </div>
+            </div>
+            """
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to enable preset {preset_id}: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f'<div class="warning">Failed to enable preset: {str(e)}</div>',
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def disable_webhook_preset(request: Request) -> HTMLResponse:
+    """Disable a webhook preset by deleting all registered webhooks.
+
+    Args:
+        request: Starlette request object (preset_id in path)
+
+    Returns:
+        HTML response with updated preset card
+    """
+    preset_id = request.path_params["preset_id"]
+
+    try:
+        # Get authenticated HTTP client
+        http_client = await _get_authenticated_client(request)
+        username = request.user.display_name
+
+        # Check admin permissions
+        is_admin = await is_nextcloud_admin(request, http_client)
+        if not is_admin:
+            return HTMLResponse(
+                content='<div class="warning">Admin access required</div>',
+                status_code=403,
+            )
+
+        # Get preset configuration
+        preset = get_preset(preset_id)
+        if not preset:
+            return HTMLResponse(
+                content=f'<div class="warning">Unknown preset: {preset_id}</div>',
+                status_code=404,
+            )
+
+        # Find and delete matching webhooks
+        webhooks_client = WebhooksClient(http_client, username)
+
+        # Get webhook IDs from database first (more reliable)
+        storage = _get_storage(request)
+        if storage:
+            webhook_ids = await storage.get_webhooks_by_preset(preset_id)
+        else:
+            # Fallback to API query if storage not available
+            enabled_presets = await _get_enabled_presets(webhooks_client)
+            webhook_ids = enabled_presets.get(preset_id, [])
+
+        for webhook_id in webhook_ids:
+            await webhooks_client.delete_webhook(webhook_id)
+            logger.info(f"Deleted webhook {webhook_id} from preset {preset_id}")
+
+        # Remove from database
+        if storage:
+            deleted_count = await storage.clear_preset_webhooks(preset_id)
+            logger.info(
+                f"Removed {deleted_count} webhook(s) for preset '{preset_id}' from database"
+            )
+
+        # Return updated card
+        return HTMLResponse(
+            content=f"""
+            <div id="preset-{preset_id}" style="border: 1px solid #e0e0e0; border-radius: 6px; padding: 20px; margin: 15px 0;">
+                <h3 style="margin-top: 0; color: #0082c9;">{preset["name"]}</h3>
+                <p style="color: #666; margin: 10px 0;">{preset["description"]}</p>
+                <p style="font-size: 13px; color: #999;">
+                    <strong>App:</strong> {preset["app"]} |
+                    <strong>Events:</strong> {len(preset["events"])}
+                </p>
+                <div style="margin-top: 15px; display: flex; align-items: center; gap: 15px;">
+                    <div><span style="color: #999;">Not Enabled</span></div>
+                    <div>
+                        <button
+                            hx-post="/app/webhooks/enable/{preset_id}"
+                            hx-target="#preset-{preset_id}"
+                            hx-swap="outerHTML"
+                            class="button button-primary">
+                            Enable
+                        </button>
+                    </div>
+                </div>
+            </div>
+            """
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to disable preset {preset_id}: {e}", exc_info=True)
+        return HTMLResponse(
+            content=f'<div class="warning">Failed to disable preset: {str(e)}</div>',
+            status_code=500,
+        )
@@ -0,0 +1,257 @@
+import os
+
+import click
+import uvicorn
+
+from nextcloud_mcp_server.config import (
+    get_settings,
+)
+from nextcloud_mcp_server.observability import get_uvicorn_logging_config
+
+from .app import get_app
+
+
+@click.command()
+@click.option(
+    "--host", "-h", default="127.0.0.1", show_default=True, help="Server host"
+)
+@click.option(
+    "--port", "-p", type=int, default=8000, show_default=True, help="Server port"
+)
+@click.option(
+    "--log-level",
+    "-l",
+    default="info",
+    show_default=True,
+    type=click.Choice(["critical", "error", "warning", "info", "debug", "trace"]),
+    help="Logging level",
+)
+@click.option(
+    "--transport",
+    "-t",
+    default="streamable-http",
+    show_default=True,
+    type=click.Choice(["streamable-http", "http"]),
+    help="MCP transport protocol",
+)
+@click.option(
+    "--enable-app",
+    "-e",
+    multiple=True,
+    type=click.Choice(
+        ["notes", "tables", "webdav", "calendar", "contacts", "cookbook", "deck"]
+    ),
+    help="Enable specific Nextcloud app APIs. Can be specified multiple times. If not specified, all apps are enabled.",
+)
+@click.option(
+    "--oauth/--no-oauth",
+    default=None,
+    help="Force OAuth mode (if enabled) or BasicAuth mode (if disabled). By default, auto-detected based on environment variables.",
+)
+@click.option(
+    "--oauth-client-id",
+    envvar="NEXTCLOUD_OIDC_CLIENT_ID",
+    help="OAuth client ID (can also use NEXTCLOUD_OIDC_CLIENT_ID env var)",
+)
+@click.option(
+    "--oauth-client-secret",
+    envvar="NEXTCLOUD_OIDC_CLIENT_SECRET",
+    help="OAuth client secret (can also use NEXTCLOUD_OIDC_CLIENT_SECRET env var)",
+)
+@click.option(
+    "--mcp-server-url",
+    envvar="NEXTCLOUD_MCP_SERVER_URL",
+    default="http://localhost:8000",
+    show_default=True,
+    help="MCP server URL for OAuth callbacks (can also use NEXTCLOUD_MCP_SERVER_URL env var)",
+)
+@click.option(
+    "--nextcloud-host",
+    envvar="NEXTCLOUD_HOST",
+    help="Nextcloud instance URL (can also use NEXTCLOUD_HOST env var)",
+)
+@click.option(
+    "--nextcloud-username",
+    envvar="NEXTCLOUD_USERNAME",
+    help="Nextcloud username for BasicAuth (can also use NEXTCLOUD_USERNAME env var)",
+)
+@click.option(
+    "--nextcloud-password",
+    envvar="NEXTCLOUD_PASSWORD",
+    help="Nextcloud password for BasicAuth (can also use NEXTCLOUD_PASSWORD env var)",
+)
+@click.option(
+    "--oauth-scopes",
+    envvar="NEXTCLOUD_OIDC_SCOPES",
+    default="openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write",
+    show_default=True,
+    help="OAuth scopes to request during client registration. These define the maximum allowed scopes for the client. Note: Actual supported scopes are discovered dynamically from MCP tools at runtime. (can also use NEXTCLOUD_OIDC_SCOPES env var)",
+)
+@click.option(
+    "--oauth-token-type",
+    envvar="NEXTCLOUD_OIDC_TOKEN_TYPE",
+    default="bearer",
+    show_default=True,
+    type=click.Choice(["bearer", "jwt"], case_sensitive=False),
+    help="OAuth token type (can also use NEXTCLOUD_OIDC_TOKEN_TYPE env var)",
+)
+@click.option(
+    "--public-issuer-url",
+    envvar="NEXTCLOUD_PUBLIC_ISSUER_URL",
+    help="Public issuer URL for OAuth (can also use NEXTCLOUD_PUBLIC_ISSUER_URL env var)",
+)
+def run(
+    host: str,
+    port: int,
+    log_level: str,
+    transport: str,
+    enable_app: tuple[str, ...],
+    oauth: bool | None,
+    oauth_client_id: str | None,
+    oauth_client_secret: str | None,
+    mcp_server_url: str,
+    nextcloud_host: str | None,
+    nextcloud_username: str | None,
+    nextcloud_password: str | None,
+    oauth_scopes: str,
+    oauth_token_type: str,
+    public_issuer_url: str | None,
+):
+    """
+    Run the Nextcloud MCP server.
+
+    \b
+    Authentication Modes:
+      - BasicAuth: Set NEXTCLOUD_USERNAME and NEXTCLOUD_PASSWORD
+      - OAuth: Leave USERNAME/PASSWORD unset (requires OIDC app enabled)
+
+    \b
+    Examples:
+      # BasicAuth mode with CLI options
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com \\
+          --nextcloud-username=admin --nextcloud-password=secret
+
+      # BasicAuth mode with env vars (recommended for credentials)
+      $ export NEXTCLOUD_HOST=https://cloud.example.com
+      $ export NEXTCLOUD_USERNAME=admin
+      $ export NEXTCLOUD_PASSWORD=secret
+      $ nextcloud-mcp-server --host 0.0.0.0 --port 8000
+
+      # OAuth mode with auto-registration
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth
+
+      # OAuth mode with pre-configured client
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth \\
+          --oauth-client-id=xxx --oauth-client-secret=yyy
+
+      # OAuth mode with custom scopes and JWT tokens
+      $ nextcloud-mcp-server --nextcloud-host=https://cloud.example.com --oauth \\
+          --oauth-scopes="openid notes:read notes:write" --oauth-token-type=jwt
+
+      # OAuth with public issuer URL (for Docker/proxy setups)
+      $ nextcloud-mcp-server --nextcloud-host=http://app --oauth \\
+          --public-issuer-url=http://localhost:8080
+    """
+    # Set env vars from CLI options if provided
+    if nextcloud_host:
+        os.environ["NEXTCLOUD_HOST"] = nextcloud_host
+    if nextcloud_username:
+        os.environ["NEXTCLOUD_USERNAME"] = nextcloud_username
+    if nextcloud_password:
+        os.environ["NEXTCLOUD_PASSWORD"] = nextcloud_password
+    if oauth_client_id:
+        os.environ["NEXTCLOUD_OIDC_CLIENT_ID"] = oauth_client_id
+    if oauth_client_secret:
+        os.environ["NEXTCLOUD_OIDC_CLIENT_SECRET"] = oauth_client_secret
+    if oauth_scopes:
+        os.environ["NEXTCLOUD_OIDC_SCOPES"] = oauth_scopes
+    if oauth_token_type:
+        os.environ["NEXTCLOUD_OIDC_TOKEN_TYPE"] = oauth_token_type
+    if mcp_server_url:
+        os.environ["NEXTCLOUD_MCP_SERVER_URL"] = mcp_server_url
+    if public_issuer_url:
+        os.environ["NEXTCLOUD_PUBLIC_ISSUER_URL"] = public_issuer_url
+
+    # Force OAuth mode if explicitly requested
+    if oauth is True:
+        # Clear username/password to force OAuth mode
+        if "NEXTCLOUD_USERNAME" in os.environ:
+            click.echo(
+                "Warning: --oauth flag set, ignoring NEXTCLOUD_USERNAME", err=True
+            )
+            del os.environ["NEXTCLOUD_USERNAME"]
+        if "NEXTCLOUD_PASSWORD" in os.environ:
+            click.echo(
+                "Warning: --oauth flag set, ignoring NEXTCLOUD_PASSWORD", err=True
+            )
+            del os.environ["NEXTCLOUD_PASSWORD"]
+
+        # Validate OAuth configuration
+        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
+        if not nextcloud_host:
+            raise click.ClickException(
+                "OAuth mode requires NEXTCLOUD_HOST environment variable to be set"
+            )
+
+        # Check if we have client credentials OR if dynamic registration is possible
+        has_client_creds = os.getenv("NEXTCLOUD_OIDC_CLIENT_ID") and os.getenv(
+            "NEXTCLOUD_OIDC_CLIENT_SECRET"
+        )
+
+        if not has_client_creds:
+            # No client credentials - will attempt dynamic registration
+            # Show helpful message before server starts
+            click.echo("", err=True)
+            click.echo("OAuth Configuration:", err=True)
+            click.echo("  Mode: Dynamic Client Registration", err=True)
+            click.echo("  Host: " + nextcloud_host, err=True)
+            click.echo("  Storage: SQLite (TOKEN_STORAGE_DB)", err=True)
+            click.echo("", err=True)
+            click.echo(
+                "Note: Make sure 'Dynamic Client Registration' is enabled", err=True
+            )
+            click.echo("      in your Nextcloud OIDC app settings.", err=True)
+            click.echo("", err=True)
+        else:
+            click.echo("", err=True)
+            click.echo("OAuth Configuration:", err=True)
+            click.echo("  Mode: Pre-configured Client", err=True)
+            click.echo("  Host: " + nextcloud_host, err=True)
+            click.echo(
+                "  Client ID: "
+                + os.getenv("NEXTCLOUD_OIDC_CLIENT_ID", "")[:16]
+                + "...",
+                err=True,
+            )
+            click.echo("", err=True)
+
+    elif oauth is False:
+        # Force BasicAuth mode - verify credentials exist
+        if not os.getenv("NEXTCLOUD_USERNAME") or not os.getenv("NEXTCLOUD_PASSWORD"):
+            raise click.ClickException(
+                "--no-oauth flag set but NEXTCLOUD_USERNAME or NEXTCLOUD_PASSWORD not set"
+            )
+
+    enabled_apps = list(enable_app) if enable_app else None
+
+    app = get_app(transport=transport, enabled_apps=enabled_apps)
+
+    # Get observability settings and create uvicorn logging config
+    settings = get_settings()
+    uvicorn_log_config = get_uvicorn_logging_config(
+        log_format=settings.log_format,
+        log_level=settings.log_level,
+        include_trace_context=settings.log_include_trace_context,
+    )
+
+    uvicorn.run(
+        app=app,
+        host=host,
+        port=port,
+        log_level=log_level,
+        log_config=uvicorn_log_config,
+    )
+
+
+if __name__ == "__main__":
+    run()
@@ -9,6 +9,7 @@ from httpx import (
    BasicAuth,
    Request,
    Response,
+    Timeout,
 )

 from ..controllers.notes_search import NotesSearchController
@@ -17,11 +18,13 @@ from .contacts import ContactsClient
 from .cookbook import CookbookClient
 from .deck import DeckClient
 from .groups import GroupsClient
+from .news import NewsClient
 from .notes import NotesClient
 from .sharing import SharingClient
 from .tables import TablesClient
 from .users import UsersClient
 from .webdav import WebDAVClient
+from .webhooks import WebhooksClient

 logger = logging.getLogger(__name__)

@@ -66,6 +69,7 @@ class NextcloudClient:
            auth=auth,
            transport=AsyncDisableCookieTransport(AsyncHTTPTransport()),
            event_hooks={"request": [log_request], "response": [log_response]},
+            timeout=Timeout(timeout=30, connect=5),
        )

        # Initialize app clients
@@ -78,9 +82,11 @@ class NextcloudClient:
        self.contacts = ContactsClient(self._client, username)
        self.cookbook = CookbookClient(self._client, username)
        self.deck = DeckClient(self._client, username)
+        self.news = NewsClient(self._client, username)
        self.users = UsersClient(self._client, username)
        self.groups = GroupsClient(self._client, username)
        self.sharing = SharingClient(self._client, username)
+        self.webhooks = WebhooksClient(self._client, username)

        # Initialize controllers
        self._notes_search = NotesSearchController()
@@ -126,10 +132,75 @@ class NextcloudClient:
        all_notes = self.notes.get_all_notes()
        return await self._notes_search.search_notes(all_notes, query)

+    async def find_files_by_tag(
+        self, tag_name: str, mime_type_filter: str | None = None
+    ) -> list[dict]:
+        """Find files by system tag name, optionally filtered by MIME type.
+
+        This method coordinates tag lookup and file retrieval via WebDAV:
+        1. Look up the tag ID by name
+        2. Get all files with that tag (via REPORT with full metadata)
+        3. Optionally filter by MIME type
+
+        Args:
+            tag_name: Name of the system tag to search for (e.g., "vector-index")
+            mime_type_filter: Optional MIME type filter (e.g., "application/pdf")
+
+        Returns:
+            List of file dictionaries with WebDAV properties (path, size, content_type, etc.)
+
+        Raises:
+            RuntimeError: If tag lookup or file query fails
+
+        Examples:
+            # Find all files with "vector-index" tag
+            files = await nc_client.find_files_by_tag("vector-index")
+
+            # Find only PDFs with the tag
+            pdfs = await nc_client.find_files_by_tag("vector-index", "application/pdf")
+        """
+        # Look up tag by name using WebDAV
+        tag = await self.webdav.get_tag_by_name(tag_name)
+        if not tag:
+            logger.debug(f"Tag '{tag_name}' not found, returning empty list")
+            return []
+
+        # Get files with this tag (returns full file info from REPORT)
+        files = await self.webdav.get_files_by_tag(tag["id"])
+        if not files:
+            logger.debug(f"No files found with tag '{tag_name}'")
+            return []
+
+        logger.debug(f"Found {len(files)} files with tag '{tag_name}'")
+
+        # Apply MIME type filter if specified
+        if mime_type_filter:
+            filtered_files = [
+                f
+                for f in files
+                if f.get("content_type", "").startswith(mime_type_filter)
+            ]
+            logger.info(
+                f"Returning {len(filtered_files)} files with tag '{tag_name}' (filtered by {mime_type_filter})"
+            )
+            return filtered_files
+
+        logger.info(f"Returning {len(files)} files with tag '{tag_name}'")
+        return files
+
    def _get_webdav_base_path(self) -> str:
        """Helper to get the base WebDAV path for the authenticated user."""
        return f"/remote.php/dav/files/{self.username}"

+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - closes all clients."""
+        await self.close()
+        return False  # Don't suppress exceptions
+
    async def close(self):
        """Close the HTTP client and CalDAV client."""
        await self._client.aclose()
@@ -5,6 +5,7 @@ import time
 from abc import ABC
 from functools import wraps

+import anyio
 from httpx import AsyncClient, HTTPStatusError, RequestError, codes

 from nextcloud_mcp_server.observability.metrics import (
@@ -47,7 +48,7 @@ def retry_on_429(func):
                    # Record retry metric (extract app name from args if available)
                    if len(args) > 0 and hasattr(args[0], "app_name"):
                        record_nextcloud_api_retry(app=args[0].app_name, reason="429")
-                    time.sleep(5)
+                    await anyio.sleep(5)
                elif e.response.status_code == 404:
                    # 404 errors are often expected (e.g., checking if attachments exist)
                    # Log as debug instead of warning
@@ -0,0 +1,385 @@
+"""Client for Nextcloud News app operations."""
+
+import logging
+from enum import IntEnum
+from typing import Any
+
+from .base import BaseNextcloudClient
+
+logger = logging.getLogger(__name__)
+
+
+class NewsItemType(IntEnum):
+    """Type constants for News API item queries."""
+
+    FEED = 0  # Single feed
+    FOLDER = 1  # Folder and its feeds
+    STARRED = 2  # All starred items
+    ALL = 3  # All items
+
+
+class NewsClient(BaseNextcloudClient):
+    """Client for Nextcloud News app operations."""
+
+    app_name = "news"
+    API_BASE = "/apps/news/api/v1-3"
+
+    # --- Folders ---
+
+    async def get_folders(self) -> list[dict[str, Any]]:
+        """Get all folders."""
+        response = await self._make_request("GET", f"{self.API_BASE}/folders")
+        return response.json().get("folders", [])
+
+    async def create_folder(self, name: str) -> dict[str, Any]:
+        """Create a new folder.
+
+        Args:
+            name: Folder name
+
+        Returns:
+            Created folder data
+
+        Raises:
+            HTTPStatusError: 409 if folder name already exists,
+                            422 if name is empty
+        """
+        response = await self._make_request(
+            "POST", f"{self.API_BASE}/folders", json={"name": name}
+        )
+        folders = response.json().get("folders", [])
+        return folders[0] if folders else {}
+
+    async def rename_folder(self, folder_id: int, name: str) -> None:
+        """Rename a folder.
+
+        Args:
+            folder_id: Folder ID
+            name: New folder name
+
+        Raises:
+            HTTPStatusError: 404 if folder not found, 409 if name exists
+        """
+        await self._make_request(
+            "PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name}
+        )
+
+    async def delete_folder(self, folder_id: int) -> None:
+        """Delete a folder and all its feeds/items.
+
+        Args:
+            folder_id: Folder ID
+
+        Raises:
+            HTTPStatusError: 404 if folder not found
+        """
+        await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}")
+
+    async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None:
+        """Mark all items in a folder as read.
+
+        Args:
+            folder_id: Folder ID
+            newest_item_id: ID of newest item to mark read (prevents marking
+                           items user hasn't seen yet)
+
+        Raises:
+            HTTPStatusError: 404 if folder not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/folders/{folder_id}/read",
+            json={"newestItemId": newest_item_id},
+        )
+
+    # --- Feeds ---
+
+    async def get_feeds(self) -> dict[str, Any]:
+        """Get all feeds with metadata.
+
+        Returns:
+            Dict with keys:
+                - feeds: List of feed objects
+                - starredCount: Number of starred items
+                - newestItemId: ID of newest item (omitted if no items)
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/feeds")
+        return response.json()
+
+    async def create_feed(
+        self, url: str, folder_id: int | None = None
+    ) -> dict[str, Any]:
+        """Subscribe to a new feed.
+
+        Args:
+            url: Feed URL
+            folder_id: Optional folder ID (None for root)
+
+        Returns:
+            Created feed data
+
+        Raises:
+            HTTPStatusError: 409 if feed already exists, 422 if URL is invalid
+        """
+        body: dict[str, Any] = {"url": url}
+        if folder_id is not None:
+            body["folderId"] = folder_id
+        response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body)
+        data = response.json()
+        feeds = data.get("feeds", [])
+        return feeds[0] if feeds else {}
+
+    async def delete_feed(self, feed_id: int) -> None:
+        """Unsubscribe from a feed (deletes all items).
+
+        Args:
+            feed_id: Feed ID
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}")
+
+    async def move_feed(self, feed_id: int, folder_id: int | None) -> None:
+        """Move a feed to a different folder.
+
+        Args:
+            feed_id: Feed ID
+            folder_id: Target folder ID (None for root)
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/move",
+            json={"folderId": folder_id},
+        )
+
+    async def rename_feed(self, feed_id: int, title: str) -> None:
+        """Rename a feed.
+
+        Args:
+            feed_id: Feed ID
+            title: New feed title
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/rename",
+            json={"feedTitle": title},
+        )
+
+    async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None:
+        """Mark all items in a feed as read.
+
+        Args:
+            feed_id: Feed ID
+            newest_item_id: ID of newest item to mark read
+
+        Raises:
+            HTTPStatusError: 404 if feed not found
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/feeds/{feed_id}/read",
+            json={"newestItemId": newest_item_id},
+        )
+
+    # --- Items ---
+
+    async def get_items(
+        self,
+        batch_size: int = 50,
+        offset: int = 0,
+        type_: int = NewsItemType.ALL,
+        id_: int = 0,
+        get_read: bool = True,
+        oldest_first: bool = False,
+    ) -> list[dict[str, Any]]:
+        """Get items (articles) with filtering.
+
+        Args:
+            batch_size: Number of items to return (-1 for all)
+            offset: Item ID to start after (for pagination)
+            type_: Item type filter (NewsItemType)
+            id_: Feed/folder ID (ignored for STARRED/ALL types)
+            get_read: Include read items
+            oldest_first: Sort oldest first instead of newest
+
+        Returns:
+            List of item objects
+        """
+        params: dict[str, Any] = {
+            "batchSize": batch_size,
+            "offset": offset,
+            "type": type_,
+            "id": id_,
+            "getRead": str(get_read).lower(),
+            "oldestFirst": str(oldest_first).lower(),
+        }
+        response = await self._make_request(
+            "GET", f"{self.API_BASE}/items", params=params
+        )
+        return response.json().get("items", [])
+
+    async def get_item(self, item_id: int) -> dict[str, Any]:
+        """Get a specific item by ID.
+
+        Args:
+            item_id: Item ID
+
+        Returns:
+            Item data
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/items/{item_id}")
+        return response.json()
+
+    async def get_updated_items(
+        self,
+        last_modified: int,
+        type_: int = NewsItemType.ALL,
+        id_: int = 0,
+    ) -> list[dict[str, Any]]:
+        """Get items modified since a timestamp (for delta sync).
+
+        Args:
+            last_modified: Unix timestamp (seconds or microseconds)
+            type_: Item type filter
+            id_: Feed/folder ID
+
+        Returns:
+            List of modified items (includes deleted items)
+        """
+        params: dict[str, Any] = {
+            "lastModified": last_modified,
+            "type": type_,
+            "id": id_,
+        }
+        response = await self._make_request(
+            "GET", f"{self.API_BASE}/items/updated", params=params
+        )
+        return response.json().get("items", [])
+
+    async def mark_item_read(self, item_id: int) -> None:
+        """Mark a single item as read.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read")
+
+    async def mark_item_unread(self, item_id: int) -> None:
+        """Mark a single item as unread.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread")
+
+    async def star_item(self, item_id: int) -> None:
+        """Star (favorite) a single item.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star")
+
+    async def unstar_item(self, item_id: int) -> None:
+        """Unstar a single item.
+
+        Args:
+            item_id: Item ID
+
+        Raises:
+            HTTPStatusError: 404 if item not found
+        """
+        await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar")
+
+    async def mark_items_read(self, item_ids: list[int]) -> None:
+        """Mark multiple items as read.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids}
+        )
+
+    async def mark_items_unread(self, item_ids: list[int]) -> None:
+        """Mark multiple items as unread.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/items/unread/multiple",
+            json={"itemIds": item_ids},
+        )
+
+    async def star_items(self, item_ids: list[int]) -> None:
+        """Star multiple items.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids}
+        )
+
+    async def unstar_items(self, item_ids: list[int]) -> None:
+        """Unstar multiple items.
+
+        Args:
+            item_ids: List of item IDs
+        """
+        await self._make_request(
+            "POST",
+            f"{self.API_BASE}/items/unstar/multiple",
+            json={"itemIds": item_ids},
+        )
+
+    async def mark_all_read(self, newest_item_id: int) -> None:
+        """Mark all items as read.
+
+        Args:
+            newest_item_id: ID of newest item to mark read
+        """
+        await self._make_request(
+            "POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id}
+        )
+
+    # --- Status ---
+
+    async def get_status(self) -> dict[str, Any]:
+        """Get News app status and configuration.
+
+        Returns:
+            Dict with version and warnings
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/status")
+        return response.json()
+
+    async def get_version(self) -> str:
+        """Get News app version.
+
+        Returns:
+            Version string (e.g., "25.0.0")
+        """
+        response = await self._make_request("GET", f"{self.API_BASE}/version")
+        return response.json().get("version", "")
@@ -18,18 +18,57 @@ class NotesClient(BaseNextcloudClient):
        response = await self._make_request("GET", "/apps/notes/api/v1/settings")
        return response.json()

-    async def get_all_notes(self) -> AsyncIterator[Dict[str, Any]]:
-        """Get all notes, yielding them one at a time."""
+    async def get_all_notes(
+        self, prune_before: Optional[int] = None
+    ) -> AsyncIterator[Dict[str, Any]]:
+        """Get all notes, yielding them one at a time.
+
+        The Notes API returns changed notes with full data in chunks, and ALL note IDs
+        (with only 'id' field) in the last chunk for deletion detection. This causes
+        duplicates which we handle by tracking seen IDs (first occurrence with full
+        data is kept, later pruned duplicates are skipped).
+
+        Args:
+            prune_before: Optional Unix timestamp. Notes unchanged since this time
+                         are pruned (only 'id' field returned in last chunk).
+                         Reduces data transfer for large note collections.
+
+        Yields:
+            Note dictionaries with full data (deduplicated).
+        """
        cursor = ""
+        seen_ids: set[int] = set()

        while True:
+            params: Dict[str, Any] = {"chunkSize": 100}
+            if cursor:
+                params["chunkCursor"] = cursor
+            if prune_before is not None:
+                params["pruneBefore"] = prune_before
+
            response = await self._make_request(
                "GET",
                "/apps/notes/api/v1/notes",
-                params={"chunkSize": 10, "chunkCursor": cursor},
+                params=params,
            )
-            for note in response.json():
+            response_data = response.json()
+
+            for note in response_data:
+                note_id = note.get("id")
+                if note_id is None:
+                    logger.warning(f"Skipping note without ID: {note}")
+                    continue
+
+                # Skip duplicates (API returns all IDs in last chunk for deletion detection)
+                if note_id in seen_ids:
+                    logger.debug(
+                        f"Skipping duplicate note {note_id} (pruned version in last chunk)"
+                    )
+                    continue
+
+                seen_ids.add(note_id)
                yield note
+
            if "X-Notes-Chunk-Cursor" not in response.headers:
                break
            cursor = response.headers["X-Notes-Chunk-Cursor"]
@@ -821,6 +821,20 @@ class WebDAVClient(BaseNextcloudClient):
                    item["file_id"] = int(value) if value else None
                elif tag == "favorite":
                    item["is_favorite"] = value == "1"
+                elif tag == "tags":
+                    # Tags can be comma-separated or have multiple child elements
+                    if value:
+                        # Handle comma-separated tags
+                        item["tags"] = [
+                            t.strip() for t in value.split(",") if t.strip()
+                        ]
+                    else:
+                        # Check for child tag elements (alternative format)
+                        tag_elements = child.findall(".//{http://owncloud.org/ns}tag")
+                        if tag_elements:
+                            item["tags"] = [t.text for t in tag_elements if t.text]
+                        else:
+                            item["tags"] = []
                elif tag == "permissions":
                    item["permissions"] = value
                elif tag == "size":
@@ -948,3 +962,574 @@ class WebDAVClient(BaseNextcloudClient):
            properties=properties,
            limit=limit,
        )
+
+    async def find_by_tag(
+        self, tag_name: str, scope: str = "", limit: Optional[int] = None
+    ) -> List[Dict[str, Any]]:
+        """Find files by tag name.
+
+        DEPRECATED: Use NextcloudClient.find_files_by_tag() instead, which uses
+        the proper OCS Tags API rather than WebDAV SEARCH.
+
+        Args:
+            tag_name: Tag to filter by (e.g., "vector-index")
+            scope: Directory path to search in (empty string for user root)
+            limit: Maximum number of results to return
+
+        Returns:
+            List of files/directories with the specified tag
+
+        Examples:
+            # Find all files tagged with "vector-index"
+            results = await find_by_tag("vector-index")
+
+            # Find tagged files in a specific folder
+            results = await find_by_tag("vector-index", scope="Documents")
+        """
+        # Use LIKE for tag matching since tags can be comma-separated
+        where_conditions = f"""
+            <d:like>
+                <d:prop>
+                    <oc:tags/>
+                </d:prop>
+                <d:literal>%{tag_name}%</d:literal>
+            </d:like>
+        """
+
+        # Request tag property along with standard properties
+        properties = [
+            "displayname",
+            "getcontentlength",
+            "getcontenttype",
+            "getlastmodified",
+            "resourcetype",
+            "getetag",
+            "fileid",
+            "tags",
+        ]
+
+        return await self.search_files(
+            scope=scope,
+            where_conditions=where_conditions,
+            properties=properties,
+            limit=limit,
+        )
+
+    async def _get_file_info_by_id(self, file_id: int) -> Dict[str, Any]:
+        """Get file information by Nextcloud file ID using WebDAV.
+
+        Args:
+            file_id: Nextcloud internal file ID
+
+        Returns:
+            File information dictionary with path, size, content_type, etc.
+
+        Raises:
+            HTTPStatusError: If file not found or request fails
+        """
+        # Nextcloud allows accessing files by ID via special meta endpoint
+        meta_path = f"/remote.php/dav/meta/{file_id}/"
+
+        propfind_body = """<?xml version="1.0"?>
+        <d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+            <d:prop>
+                <d:displayname/>
+                <d:getcontentlength/>
+                <d:getcontenttype/>
+                <d:getlastmodified/>
+                <d:resourcetype/>
+                <d:getetag/>
+                <oc:fileid/>
+            </d:prop>
+        </d:propfind>"""
+
+        headers = {"Depth": "0", "Content-Type": "text/xml", "OCS-APIRequest": "true"}
+
+        response = await self._make_request(
+            "PROPFIND", meta_path, content=propfind_body, headers=headers
+        )
+        response.raise_for_status()
+
+        # Parse the XML response
+        root = ET.fromstring(response.content)
+        responses = root.findall(".//{DAV:}response")
+
+        if not responses:
+            raise RuntimeError(f"File ID {file_id} not found")
+
+        response_elem = responses[0]
+        href = response_elem.find(".//{DAV:}href")
+        if href is None:
+            raise RuntimeError(f"No href in response for file ID {file_id}")
+
+        propstat = response_elem.find(".//{DAV:}propstat")
+        if propstat is None:
+            raise RuntimeError(f"No propstat for file ID {file_id}")
+
+        prop = propstat.find(".//{DAV:}prop")
+        if prop is None:
+            raise RuntimeError(f"No prop for file ID {file_id}")
+
+        # Extract file path from displayname or construct from file ID
+        displayname_elem = prop.find(".//{DAV:}displayname")
+        name = (
+            displayname_elem.text if displayname_elem is not None else f"file_{file_id}"
+        )
+
+        # Get file properties
+        size_elem = prop.find(".//{DAV:}getcontentlength")
+        size = int(size_elem.text) if size_elem is not None and size_elem.text else 0
+
+        content_type_elem = prop.find(".//{DAV:}getcontenttype")
+        content_type = content_type_elem.text if content_type_elem is not None else None
+
+        modified_elem = prop.find(".//{DAV:}getlastmodified")
+        modified = modified_elem.text if modified_elem is not None else None
+
+        etag_elem = prop.find(".//{DAV:}getetag")
+        etag = (
+            etag_elem.text.strip('"')
+            if etag_elem is not None and etag_elem.text
+            else None
+        )
+
+        # Check if it's a directory
+        resourcetype = prop.find(".//{DAV:}resourcetype")
+        is_directory = (
+            resourcetype is not None
+            and resourcetype.find(".//{DAV:}collection") is not None
+        )
+
+        # Try to get actual file path - meta endpoint doesn't give us the real path
+        # so we'll construct a reasonable path from the name
+        # The calling code in NextcloudClient will have the context to determine the actual path
+        file_info = {
+            "name": name,
+            "path": f"/{name}",  # Placeholder - caller should use WebDAV to get real path if needed
+            "size": size,
+            "content_type": content_type,
+            "last_modified": modified,
+            "etag": etag,
+            "is_directory": is_directory,
+            "file_id": file_id,
+        }
+
+        logger.debug(f"Retrieved file info for ID {file_id}: {name}")
+        return file_info
+
+    async def get_tag_by_name(self, tag_name: str) -> dict[str, Any] | None:
+        """Get a system tag by its name via WebDAV.
+
+        Args:
+            tag_name: Name of the tag to find (case-sensitive)
+
+        Returns:
+            Tag dictionary if found, None otherwise
+        """
+        # Use WebDAV PROPFIND to list all systemtags
+        propfind_body = """<?xml version="1.0"?>
+<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+  <d:prop>
+    <oc:id/>
+    <oc:display-name/>
+    <oc:user-visible/>
+    <oc:user-assignable/>
+  </d:prop>
+</d:propfind>"""
+
+        response = await self._client.request(
+            "PROPFIND",
+            "/remote.php/dav/systemtags/",
+            headers={"Depth": "1"},
+            content=propfind_body,
+        )
+        response.raise_for_status()
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        for response_elem in root.findall("d:response", ns):
+            href = response_elem.find("d:href", ns)
+            if href is None or href.text == "/remote.php/dav/systemtags/":
+                # Skip the collection itself
+                continue
+
+            propstat = response_elem.find("d:propstat", ns)
+            if propstat is None:
+                continue
+
+            prop = propstat.find("d:prop", ns)
+            if prop is None:
+                continue
+
+            # Extract tag properties
+            tag_id_elem = prop.find("oc:id", ns)
+            display_name_elem = prop.find("oc:display-name", ns)
+            user_visible_elem = prop.find("oc:user-visible", ns)
+            user_assignable_elem = prop.find("oc:user-assignable", ns)
+
+            if display_name_elem is not None and display_name_elem.text == tag_name:
+                tag_info = {
+                    "id": int(tag_id_elem.text)
+                    if tag_id_elem is not None and tag_id_elem.text is not None
+                    else None,
+                    "name": display_name_elem.text,
+                    "userVisible": user_visible_elem.text.lower() == "true"
+                    if user_visible_elem is not None
+                    else True,
+                    "userAssignable": user_assignable_elem.text.lower() == "true"
+                    if user_assignable_elem is not None
+                    else True,
+                }
+                logger.debug(f"Found tag '{tag_name}' with ID {tag_info['id']}")
+                return tag_info
+
+        logger.debug(f"Tag '{tag_name}' not found")
+        return None
+
+    async def get_files_by_tag(self, tag_id: int) -> list[dict[str, Any]]:
+        """Get all files tagged with a specific system tag via WebDAV REPORT.
+
+        Args:
+            tag_id: Numeric ID of the tag
+
+        Returns:
+            List of file info dictionaries with path, size, content_type, etc.
+        """
+        # Use WebDAV REPORT method with systemtag filter, requesting all properties
+        report_body = f"""<?xml version="1.0"?>
+<oc:filter-files xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns" xmlns:nc="http://nextcloud.org/ns">
+  <d:prop>
+    <oc:fileid/>
+    <d:displayname/>
+    <d:getcontentlength/>
+    <d:getcontenttype/>
+    <d:getlastmodified/>
+    <d:getetag/>
+  </d:prop>
+  <oc:filter-rules>
+    <oc:systemtag>{tag_id}</oc:systemtag>
+  </oc:filter-rules>
+</oc:filter-files>"""
+
+        response = await self._client.request(
+            "REPORT",
+            f"{self._get_webdav_base_path()}/",
+            content=report_body,
+        )
+        response.raise_for_status()
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        files = []
+        for response_elem in root.findall("d:response", ns):
+            # Extract href (file path)
+            href_elem = response_elem.find("d:href", ns)
+            if href_elem is None or not href_elem.text:
+                continue
+
+            propstat = response_elem.find("d:propstat", ns)
+            if propstat is None:
+                continue
+
+            prop = propstat.find("d:prop", ns)
+            if prop is None:
+                continue
+
+            # Extract all properties
+            fileid_elem = prop.find("oc:fileid", ns)
+            displayname_elem = prop.find("d:displayname", ns)
+            contentlength_elem = prop.find("d:getcontentlength", ns)
+            contenttype_elem = prop.find("d:getcontenttype", ns)
+            lastmodified_elem = prop.find("d:getlastmodified", ns)
+            etag_elem = prop.find("d:getetag", ns)
+
+            if fileid_elem is None or not fileid_elem.text:
+                continue
+
+            # Decode href path and extract the file path
+            from urllib.parse import unquote
+
+            href_path = unquote(href_elem.text)
+            # Remove WebDAV prefix to get user-relative path
+            webdav_prefix = f"/remote.php/dav/files/{self.username}/"
+            file_path = href_path.replace(webdav_prefix, "/")
+
+            # Parse last modified timestamp
+            last_modified_timestamp = None
+            if lastmodified_elem is not None and lastmodified_elem.text:
+                from email.utils import parsedate_to_datetime
+
+                try:
+                    dt = parsedate_to_datetime(lastmodified_elem.text)
+                    last_modified_timestamp = int(dt.timestamp())
+                except Exception:
+                    pass
+
+            file_info = {
+                "id": int(fileid_elem.text),
+                "path": file_path,
+                "name": displayname_elem.text
+                if displayname_elem is not None
+                else file_path.split("/")[-1],
+                "size": int(contentlength_elem.text)
+                if contentlength_elem is not None and contentlength_elem.text
+                else 0,
+                "content_type": contenttype_elem.text
+                if contenttype_elem is not None
+                else "",
+                "last_modified": lastmodified_elem.text
+                if lastmodified_elem is not None
+                else None,
+                "last_modified_timestamp": last_modified_timestamp,
+                "etag": etag_elem.text if etag_elem is not None else None,
+            }
+            files.append(file_info)
+
+        logger.debug(f"Found {len(files)} files with tag ID {tag_id}")
+        return files
+
+    async def get_file_info(self, path: str) -> dict[str, Any] | None:
+        """Get file info including file ID via WebDAV PROPFIND.
+
+        Args:
+            path: Path to the file (relative to user's files directory)
+
+        Returns:
+            File info dictionary with id, name, size, content_type, etc.
+            Returns None if file not found.
+        """
+        webdav_path = f"{self._get_webdav_base_path()}/{path.lstrip('/')}"
+
+        propfind_body = """<?xml version="1.0"?>
+<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+  <d:prop>
+    <oc:fileid/>
+    <d:displayname/>
+    <d:getcontentlength/>
+    <d:getcontenttype/>
+    <d:getlastmodified/>
+    <d:getetag/>
+    <d:resourcetype/>
+  </d:prop>
+</d:propfind>"""
+
+        try:
+            response = await self._client.request(
+                "PROPFIND",
+                webdav_path,
+                headers={"Depth": "0"},
+                content=propfind_body,
+            )
+            response.raise_for_status()
+        except HTTPStatusError as e:
+            if e.response.status_code == 404:
+                logger.debug(f"File not found: {path}")
+                return None
+            raise
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        response_elem = root.find("d:response", ns)
+        if response_elem is None:
+            return None
+
+        propstat = response_elem.find("d:propstat", ns)
+        if propstat is None:
+            return None
+
+        prop = propstat.find("d:prop", ns)
+        if prop is None:
+            return None
+
+        # Extract properties
+        fileid_elem = prop.find("oc:fileid", ns)
+        displayname_elem = prop.find("d:displayname", ns)
+        contentlength_elem = prop.find("d:getcontentlength", ns)
+        contenttype_elem = prop.find("d:getcontenttype", ns)
+        lastmodified_elem = prop.find("d:getlastmodified", ns)
+        etag_elem = prop.find("d:getetag", ns)
+        resourcetype_elem = prop.find("d:resourcetype", ns)
+
+        is_directory = (
+            resourcetype_elem is not None
+            and resourcetype_elem.find("d:collection", ns) is not None
+        )
+
+        file_info = {
+            "id": int(fileid_elem.text)
+            if fileid_elem is not None and fileid_elem.text is not None
+            else None,
+            "path": path,
+            "name": displayname_elem.text
+            if displayname_elem is not None
+            else path.split("/")[-1],
+            "size": int(contentlength_elem.text)
+            if contentlength_elem is not None and contentlength_elem.text
+            else 0,
+            "content_type": contenttype_elem.text
+            if contenttype_elem is not None
+            else "",
+            "last_modified": lastmodified_elem.text
+            if lastmodified_elem is not None
+            else None,
+            "etag": etag_elem.text.strip('"')
+            if etag_elem is not None and etag_elem.text
+            else None,
+            "is_directory": is_directory,
+        }
+
+        logger.debug(f"Got file info for '{path}': id={file_info['id']}")
+        return file_info
+
+    async def create_tag(
+        self,
+        name: str,
+        user_visible: bool = True,
+        user_assignable: bool = True,
+    ) -> dict[str, Any]:
+        """Create a system tag via WebDAV.
+
+        Args:
+            name: Name of the tag to create
+            user_visible: Whether the tag is visible to users
+            user_assignable: Whether users can assign this tag
+
+        Returns:
+            Tag dictionary with id, name, userVisible, userAssignable
+
+        Raises:
+            HTTPStatusError: If tag creation fails (409 if already exists)
+        """
+        # Use WebDAV POST with JSON body to create tag
+        response = await self._client.post(
+            "/remote.php/dav/systemtags/",
+            headers={"Content-Type": "application/json"},
+            json={
+                "name": name,
+                "userVisible": user_visible,
+                "userAssignable": user_assignable,
+            },
+        )
+        response.raise_for_status()
+
+        # Extract tag ID from Content-Location header (e.g., /remote.php/dav/systemtags/42)
+        content_location = response.headers.get("Content-Location", "")
+        tag_id = None
+        if content_location:
+            # Extract the numeric ID from the path
+            try:
+                tag_id = int(content_location.rstrip("/").split("/")[-1])
+            except (ValueError, IndexError):
+                pass
+
+        tag_info = {
+            "id": tag_id,
+            "name": name,
+            "userVisible": user_visible,
+            "userAssignable": user_assignable,
+        }
+
+        logger.info(f"Created tag '{name}' with ID {tag_info['id']}")
+        return tag_info
+
+    async def get_or_create_tag(
+        self,
+        name: str,
+        user_visible: bool = True,
+        user_assignable: bool = True,
+    ) -> dict[str, Any]:
+        """Get a tag by name, creating it if it doesn't exist.
+
+        Args:
+            name: Name of the tag
+            user_visible: Whether the tag is visible to users (for creation)
+            user_assignable: Whether users can assign this tag (for creation)
+
+        Returns:
+            Tag dictionary with id, name, userVisible, userAssignable
+        """
+        # First try to get existing tag
+        existing_tag = await self.get_tag_by_name(name)
+        if existing_tag:
+            logger.debug(f"Tag '{name}' already exists with ID {existing_tag['id']}")
+            return existing_tag
+
+        # Create new tag
+        try:
+            return await self.create_tag(name, user_visible, user_assignable)
+        except HTTPStatusError as e:
+            if e.response.status_code == 409:
+                # Tag was created between our check and creation, fetch it
+                existing_tag = await self.get_tag_by_name(name)
+                if existing_tag:
+                    return existing_tag
+            raise
+
+    async def assign_tag_to_file(self, file_id: int, tag_id: int) -> bool:
+        """Assign a system tag to a file.
+
+        Args:
+            file_id: Numeric file ID
+            tag_id: Numeric tag ID
+
+        Returns:
+            True if tag was assigned successfully (or already assigned)
+
+        Raises:
+            HTTPStatusError: If tag assignment fails
+        """
+        response = await self._client.request(
+            "PUT",
+            f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
+            headers={"Content-Length": "0"},
+            content=b"",
+        )
+
+        # 201 = Created (new assignment), 409 = Conflict (already assigned)
+        if response.status_code in (201, 409):
+            logger.info(f"Tagged file {file_id} with tag {tag_id}")
+            return True
+
+        response.raise_for_status()
+        return True
+
+    async def remove_tag_from_file(self, file_id: int, tag_id: int) -> bool:
+        """Remove a system tag from a file.
+
+        Args:
+            file_id: Numeric file ID
+            tag_id: Numeric tag ID
+
+        Returns:
+            True if tag was removed successfully (or wasn't assigned)
+
+        Raises:
+            HTTPStatusError: If tag removal fails
+        """
+        response = await self._client.request(
+            "DELETE",
+            f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
+        )
+
+        # 204 = No Content (removed), 404 = Not Found (wasn't assigned)
+        if response.status_code in (204, 404):
+            logger.info(f"Removed tag {tag_id} from file {file_id}")
+            return True
+
+        response.raise_for_status()
+        return True
@@ -0,0 +1,109 @@
+"""Client for Nextcloud Webhook Listeners API operations."""
+
+from typing import Any, Dict, List, Optional
+
+from nextcloud_mcp_server.client.base import BaseNextcloudClient
+
+
+class WebhooksClient(BaseNextcloudClient):
+    """Client for Nextcloud webhook_listeners app API operations."""
+
+    app_name = "webhooks"
+
+    def _get_webhook_headers(
+        self, additional_headers: Optional[Dict[str, str]] = None
+    ) -> Dict[str, str]:
+        """Get standard headers required for Webhook Listeners API calls."""
+        headers = {"OCS-APIRequest": "true", "Accept": "application/json"}
+        if additional_headers:
+            headers.update(additional_headers)
+        return headers
+
+    async def list_webhooks(self) -> List[Dict[str, Any]]:
+        """List all registered webhooks for the current user.
+
+        Returns:
+            List of webhook registrations with id, uri, event, filters, etc.
+        """
+        headers = self._get_webhook_headers()
+        response = await self._make_request(
+            "GET",
+            "/ocs/v2.php/apps/webhook_listeners/api/v1/webhooks",
+            headers=headers,
+        )
+        data = response.json()["ocs"]["data"]
+        return data if isinstance(data, list) else []
+
+    async def create_webhook(
+        self,
+        event: str,
+        uri: str,
+        http_method: str = "POST",
+        auth_method: str = "none",
+        headers: Optional[Dict[str, str]] = None,
+        event_filter: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """Register a new webhook for the specified event.
+
+        Args:
+            event: Fully qualified event class name (e.g., "OCP\\Files\\Events\\Node\\NodeCreatedEvent")
+            uri: Webhook endpoint URL to receive event notifications
+            http_method: HTTP method for webhook delivery (default: "POST")
+            auth_method: Authentication method ("none", "bearer", etc.)
+            headers: Custom headers to include in webhook requests (e.g., Authorization header)
+            event_filter: JSON object specifying event filters (e.g., {"user.uid": "bob"})
+
+        Returns:
+            Webhook registration details including webhook ID
+        """
+        data: Dict[str, Any] = {
+            "httpMethod": http_method,
+            "uri": uri,
+            "event": event,
+            "authMethod": auth_method,
+        }
+
+        if headers:
+            data["headers"] = headers
+
+        if event_filter:
+            data["eventFilter"] = event_filter
+
+        request_headers = self._get_webhook_headers()
+        response = await self._make_request(
+            "POST",
+            "/ocs/v2.php/apps/webhook_listeners/api/v1/webhooks",
+            json=data,
+            headers=request_headers,
+        )
+        return response.json()["ocs"]["data"]
+
+    async def delete_webhook(self, webhook_id: int) -> None:
+        """Delete a webhook registration.
+
+        Args:
+            webhook_id: ID of the webhook to delete
+        """
+        headers = self._get_webhook_headers()
+        await self._make_request(
+            "DELETE",
+            f"/ocs/v2.php/apps/webhook_listeners/api/v1/webhooks/{webhook_id}",
+            headers=headers,
+        )
+
+    async def get_webhook(self, webhook_id: int) -> Dict[str, Any]:
+        """Get details of a specific webhook registration.
+
+        Args:
+            webhook_id: ID of the webhook to retrieve
+
+        Returns:
+            Webhook registration details
+        """
+        headers = self._get_webhook_headers()
+        response = await self._make_request(
+            "GET",
+            f"/ocs/v2.php/apps/webhook_listeners/api/v1/webhooks/{webhook_id}",
+            headers=headers,
+        )
+        return response.json()["ocs"]["data"]
@@ -2,8 +2,37 @@ import logging
 import logging.config
 import os
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any, Optional

+
+class DeploymentMode(Enum):
+    """Deployment mode for the MCP server.
+
+    SELF_HOSTED: Full features, environment-based configuration.
+                 Supports vector sync, semantic search, admin UI.
+
+    SMITHERY_STATELESS: Stateless mode for Smithery hosting.
+                        Session-based configuration, no persistent storage.
+                        Excludes semantic search, vector sync, admin UI.
+    """
+
+    SELF_HOSTED = "self_hosted"
+    SMITHERY_STATELESS = "smithery"
+
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment.
+
+    Returns:
+        DeploymentMode.SMITHERY_STATELESS if SMITHERY_DEPLOYMENT=true,
+        otherwise DeploymentMode.SELF_HOSTED (default).
+    """
+    if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+
+
 LOGGING_CONFIG = {
    "version": 1,
    "disable_existing_loggers": False,
@@ -102,6 +131,14 @@ def get_document_processor_config() -> dict[str, Any]:
            "lang": os.getenv("TESSERACT_LANG", "eng"),
        }

+    # PyMuPDF configuration (local PDF processing)
+    if os.getenv("ENABLE_PYMUPDF", "true").lower() == "true":  # Enabled by default
+        config["processors"]["pymupdf"] = {
+            "extract_images": os.getenv("PYMUPDF_EXTRACT_IMAGES", "true").lower()
+            == "true",
+            "image_dir": os.getenv("PYMUPDF_IMAGE_DIR"),  # None = use temp directory
+        }
+
    # Custom processor (via HTTP API)
    if os.getenv("ENABLE_CUSTOM_PROCESSOR", "false").lower() == "true":
        custom_url = os.getenv("CUSTOM_PROCESSOR_URL")
@@ -153,7 +190,13 @@ class Settings:
    # Token exchange cache settings
    token_exchange_cache_ttl: int = 300  # seconds (5 minutes default)

-    # Token settings
+    # Token and webhook storage settings
+    # TOKEN_ENCRYPTION_KEY: Optional - Only required for OAuth token storage operations.
+    #                       Webhook tracking works without encryption key.
+    #                       If set, must be a valid base64-encoded Fernet key (32 bytes).
+    # TOKEN_STORAGE_DB: Path to SQLite database for persistent storage.
+    #                   Used for webhook tracking (all modes) and OAuth token storage.
+    #                   Defaults to /tmp/tokens.db
    token_encryption_key: Optional[str] = None
    token_storage_db: Optional[str] = None

@@ -174,19 +217,24 @@ class Settings:
    ollama_embedding_model: str = "nomic-embed-text"
    ollama_verify_ssl: bool = True

+    # OpenAI settings (for embeddings)
+    openai_api_key: Optional[str] = None
+    openai_base_url: Optional[str] = None
+    openai_embedding_model: str = "text-embedding-3-small"
+
    # Document chunking settings (for vector embeddings)
-    document_chunk_size: int = 512  # Words per chunk
-    document_chunk_overlap: int = 50  # Overlapping words between chunks
+    document_chunk_size: int = 2048  # Characters per chunk
+    document_chunk_overlap: int = 200  # Overlapping characters between chunks

    # Observability settings
    metrics_enabled: bool = True
    metrics_port: int = 9090
-    tracing_enabled: bool = False
    otel_exporter_otlp_endpoint: Optional[str] = None
+    otel_exporter_verify_ssl: bool = False
    otel_service_name: str = "nextcloud-mcp-server"
    otel_traces_sampler: str = "always_on"
    otel_traces_sampler_arg: float = 1.0
-    log_format: str = "json"  # "json" or "text"
+    log_format: str = "text"  # "json" or "text"
    log_level: str = "INFO"
    log_include_trace_context: bool = True

@@ -204,7 +252,7 @@ class Settings:
        # Default to :memory: if neither set
        if not self.qdrant_url and not self.qdrant_location:
            self.qdrant_location = ":memory:"
-            logger.info("Using default Qdrant mode: in-memory (:memory:)")
+            logger.debug("Using default Qdrant mode: in-memory (:memory:)")

        # Warn if API key set in local mode
        if self.qdrant_location and self.qdrant_api_key:
@@ -221,10 +269,10 @@ class Settings:
                f"Overlap should be 10-20% of chunk size for optimal results."
            )

-        if self.document_chunk_size < 100:
+        if self.document_chunk_size < 512:
            logger.warning(
-                f"DOCUMENT_CHUNK_SIZE is set to {self.document_chunk_size} words, which is quite small. "
-                f"Smaller chunks may lose context. Consider using at least 256 words."
+                f"DOCUMENT_CHUNK_SIZE is set to {self.document_chunk_size} characters, which is quite small. "
+                f"Smaller chunks may lose context. Consider using at least 1024 characters."
            )

        if self.document_chunk_overlap < 0:
@@ -232,6 +280,29 @@ class Settings:
                f"DOCUMENT_CHUNK_OVERLAP ({self.document_chunk_overlap}) cannot be negative."
            )

+    def get_embedding_model_name(self) -> str:
+        """
+        Get the active embedding model name based on provider priority.
+
+        Priority order (same as ProviderRegistry):
+        1. OpenAI - if OPENAI_API_KEY is set
+        2. Ollama - if OLLAMA_BASE_URL is set
+        3. Simple - fallback (returns "simple-384")
+
+        Returns:
+            Active embedding model name
+        """
+        # Check OpenAI first (higher priority than Ollama in registry)
+        if self.openai_api_key:
+            return self.openai_embedding_model
+
+        # Check Ollama
+        if self.ollama_base_url:
+            return self.ollama_embedding_model
+
+        # Fallback to simple provider indicator
+        return "simple-384"
+
    def get_collection_name(self) -> str:
        """
        Get Qdrant collection name.
@@ -247,8 +318,9 @@ class Settings:
        Format: {deployment-id}-{model-name}

        Examples:
-            - "my-deployment-nomic-embed-text" (OTEL_SERVICE_NAME set)
-            - "mcp-container-all-minilm" (hostname fallback)
+            - "my-deployment-nomic-embed-text" (Ollama)
+            - "my-deployment-text-embedding-3-small" (OpenAI)
+            - "mcp-container-openai-text-embedding-3-small" (hostname fallback)

        Returns:
            Collection name string
@@ -268,7 +340,7 @@ class Settings:

        # Sanitize deployment ID and model name
        deployment_id = deployment_id.lower().replace(" ", "-").replace("_", "-")
-        model_name = self.ollama_embedding_model.replace("/", "-").replace(":", "-")
+        model_name = self.get_embedding_model_name().replace("/", "-").replace(":", "-")

        return f"{deployment_id}-{model_name}"

@@ -282,8 +354,8 @@ def get_settings() -> Settings:
    return Settings(
        # OAuth/OIDC settings
        oidc_discovery_url=os.getenv("OIDC_DISCOVERY_URL"),
-        oidc_client_id=os.getenv("OIDC_CLIENT_ID"),
-        oidc_client_secret=os.getenv("OIDC_CLIENT_SECRET"),
+        oidc_client_id=os.getenv("NEXTCLOUD_OIDC_CLIENT_ID"),
+        oidc_client_secret=os.getenv("NEXTCLOUD_OIDC_CLIENT_SECRET"),
        oidc_issuer=os.getenv("OIDC_ISSUER"),
        # Nextcloud settings
        nextcloud_host=os.getenv("NEXTCLOUD_HOST"),
@@ -305,7 +377,7 @@ def get_settings() -> Settings:
        ),
        # Token exchange cache settings
        token_exchange_cache_ttl=int(os.getenv("TOKEN_EXCHANGE_CACHE_TTL", "300")),
-        # Token settings
+        # Token and webhook storage settings (encryption key optional for webhook-only usage)
        token_encryption_key=os.getenv("TOKEN_ENCRYPTION_KEY"),
        token_storage_db=os.getenv("TOKEN_STORAGE_DB", "/tmp/tokens.db"),
        # Vector sync settings (ADR-007)
@@ -328,18 +400,25 @@ def get_settings() -> Settings:
        ollama_base_url=os.getenv("OLLAMA_BASE_URL"),
        ollama_embedding_model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
        ollama_verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
+        # OpenAI settings
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        openai_base_url=os.getenv("OPENAI_BASE_URL"),
+        openai_embedding_model=os.getenv(
+            "OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
+        ),
        # Document chunking settings
-        document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "512")),
-        document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "50")),
+        document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "2048")),
+        document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "200")),
        # Observability settings
        metrics_enabled=os.getenv("METRICS_ENABLED", "true").lower() == "true",
        metrics_port=int(os.getenv("METRICS_PORT", "9090")),
-        tracing_enabled=os.getenv("OTEL_ENABLED", "false").lower() == "true",
        otel_exporter_otlp_endpoint=os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT"),
+        otel_exporter_verify_ssl=os.getenv("OTEL_EXPORTER_VERIFY_SSL", "false").lower()
+        == "true",
        otel_service_name=os.getenv("OTEL_SERVICE_NAME", "nextcloud-mcp-server"),
        otel_traces_sampler=os.getenv("OTEL_TRACES_SAMPLER", "always_on"),
        otel_traces_sampler_arg=float(os.getenv("OTEL_TRACES_SAMPLER_ARG", "1.0")),
-        log_format=os.getenv("LOG_FORMAT", "json"),
+        log_format=os.getenv("LOG_FORMAT", "text"),
        log_level=os.getenv("LOG_LEVEL", "INFO"),
        log_include_trace_context=os.getenv("LOG_INCLUDE_TRACE_CONTEXT", "true").lower()
        == "true",
@@ -1,21 +1,37 @@
 """Helper functions for accessing context in MCP tools."""

+import logging
+
+from httpx import BasicAuth
 from mcp.server.fastmcp import Context

 from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.config import (
+    DeploymentMode,
+    get_deployment_mode,
+    get_settings,
+)
+
+logger = logging.getLogger(__name__)


 async def get_client(ctx: Context) -> NextcloudClient:
    """
    Get the appropriate Nextcloud client based on authentication mode.

-    ADR-005 compliant implementation supporting two modes:
-    1. BasicAuth mode: Returns shared client from lifespan context
-    2. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
-       Token already contains both MCP and Nextcloud audiences - use directly
-    3. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
-       Exchange MCP token for Nextcloud token via RFC 8693
+    ADR-016 compliant implementation supporting three deployment modes:
+
+    1. Smithery stateless mode (SMITHERY_DEPLOYMENT=true):
+       Create client from session configuration (nextcloud_url, username, app_password)
+       No persistent state - client created per-request from Smithery session config.
+
+    2. BasicAuth mode: Returns shared client from lifespan context
+
+    3. OAuth mode:
+       a. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
+          Token already contains both MCP and Nextcloud audiences - use directly
+       b. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
+          Exchange MCP token for Nextcloud token via RFC 8693

    SECURITY: Token passthrough has been REMOVED. All OAuth modes validate
    proper token audiences per MCP Security Best Practices specification.
@@ -24,7 +40,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
    by the MCP server via @require_scopes decorator, not by the IdP.

    This function automatically detects the authentication mode by checking
-    the type of the lifespan context.
+    the deployment mode and type of the lifespan context.

    Args:
        ctx: MCP request context
@@ -34,6 +50,7 @@ async def get_client(ctx: Context) -> NextcloudClient:

    Raises:
        AttributeError: If context doesn't contain expected data
+        ValueError: If Smithery mode but session config is missing required fields

    Example:
        ```python
@@ -43,6 +60,12 @@ async def get_client(ctx: Context) -> NextcloudClient:
            return await client.capabilities()
        ```
    """
+    deployment_mode = get_deployment_mode()
+
+    # ADR-016: Smithery stateless mode - create client from session config
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+        return _get_client_from_session_config(ctx)
+
    settings = get_settings()
    lifespan_ctx = ctx.request_context.lifespan_context

@@ -75,3 +98,82 @@ async def get_client(ctx: Context) -> NextcloudClient:
        f"Lifespan context does not have 'client' or 'nextcloud_host' attribute. "
        f"Type: {type(lifespan_ctx)}"
    )
+
+
+def _get_client_from_session_config(ctx: Context) -> NextcloudClient:
+    """
+    Create NextcloudClient from Smithery session configuration.
+
+    ADR-016: In Smithery stateless mode, each request includes session config
+    with the user's Nextcloud credentials. This function creates a fresh client
+    for each request - no state is persisted between requests.
+
+    For container runtime, config is extracted from URL query parameters by
+    SmitheryConfigMiddleware and stored in a context variable.
+
+    Expected session config fields (from Smithery configSchema):
+    - nextcloud_url: str - Nextcloud instance URL (required)
+    - username: str - Nextcloud username (required)
+    - app_password: str - Nextcloud app password (required)
+
+    Args:
+        ctx: MCP request context (not used directly for Smithery config)
+
+    Returns:
+        NextcloudClient configured with session credentials
+
+    Raises:
+        ValueError: If required session config fields are missing
+    """
+    # ADR-016: Get session config from context variable (set by SmitheryConfigMiddleware)
+    from nextcloud_mcp_server.app import get_smithery_session_config
+
+    session_config = get_smithery_session_config()
+
+    if session_config is None:
+        raise ValueError(
+            "Session configuration required in Smithery mode. "
+            "Ensure nextcloud_url, username, and app_password are provided as URL query parameters."
+        )
+
+    # Extract required fields - config is always a dict from SmitheryConfigMiddleware
+    nextcloud_url = session_config.get("nextcloud_url")
+    username = session_config.get("username")
+    app_password = session_config.get("app_password")
+
+    # Validate required fields
+    missing_fields = []
+    if not nextcloud_url:
+        missing_fields.append("nextcloud_url")
+    if not username:
+        missing_fields.append("username")
+    if not app_password:
+        missing_fields.append("app_password")
+
+    if missing_fields:
+        raise ValueError(
+            f"Missing required session config fields: {', '.join(missing_fields)}. "
+            f"Configure these in the Smithery connection settings."
+        )
+
+    # Type assertions after validation (for type checker)
+    # These are guaranteed to be str after the missing_fields check above
+    assert nextcloud_url is not None
+    assert username is not None
+    assert app_password is not None
+
+    # Validate URL format
+    if not nextcloud_url.startswith(("http://", "https://")):
+        raise ValueError(
+            f"Invalid nextcloud_url: {nextcloud_url}. "
+            f"Must start with http:// or https://"
+        )
+
+    logger.debug(f"Creating Smithery client for {nextcloud_url} as {username}")
+
+    # Create client with session credentials using BasicAuth
+    return NextcloudClient(
+        base_url=nextcloud_url,
+        username=username,
+        auth=BasicAuth(username, app_password),
+    )
@@ -12,13 +12,24 @@ class NotesSearchController:
        """
        Search notes using token-based matching with relevance ranking.
        Returns notes sorted by relevance score.
+        If query is empty, returns all notes.
        """
        search_results = []
        query_tokens = self._process_query(query)

-        # If empty query after processing, return empty results
+        # If empty query after processing, return all notes
        if not query_tokens:
-            return []
+            async for note in notes:
+                search_results.append(
+                    {
+                        "id": note.get("id"),
+                        "title": note.get("title"),
+                        "category": note.get("category"),
+                        "modified": note.get("modified"),
+                        "_score": None,  # No score for unfiltered results
+                    }
+                )
+            return search_results

        # Process and score each note
        async for note in notes:
@@ -1,12 +1,18 @@
 """Document processing plugins for extracting text from various file formats."""

 from .base import DocumentProcessor, ProcessingResult, ProcessorError
+from .pymupdf import PyMuPDFProcessor
 from .registry import ProcessorRegistry, get_registry

+# Register processors at module initialization
+_registry = get_registry()
+_registry.register(PyMuPDFProcessor(), priority=10)
+
 __all__ = [
    "DocumentProcessor",
    "ProcessingResult",
    "ProcessorError",
    "ProcessorRegistry",
    "get_registry",
+    "PyMuPDFProcessor",
 ]
@@ -0,0 +1,253 @@
+"""Document processor using PyMuPDF (fitz) library."""
+
+import logging
+import pathlib
+import tempfile
+from collections.abc import Awaitable, Callable
+from typing import Any, Optional
+
+# NOTE: Do NOT call pymupdf.layout.activate() here!
+# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
+# causing it to return a string instead of a list[dict].
+# See: https://github.com/pymupdf/pymupdf4llm/issues/323
+import pymupdf
+import pymupdf4llm
+
+from .base import DocumentProcessor, ProcessingResult, ProcessorError
+
+logger = logging.getLogger(__name__)
+
+
+class PyMuPDFProcessor(DocumentProcessor):
+    """Document processor using PyMuPDF library for PDF processing.
+
+    PyMuPDF (fitz) is a fast, local PDF processing library that extracts text,
+    metadata, and images without requiring external API calls.
+
+    Features:
+    - Fast text extraction with layout preservation
+    - PDF metadata extraction (title, author, creation date, page count)
+    - Image extraction for future multimodal support
+    - Page number tracking for precise citations
+    """
+
+    SUPPORTED_TYPES = {
+        "application/pdf",
+    }
+
+    def __init__(
+        self,
+        extract_images: bool = True,
+        image_dir: Optional[str | pathlib.Path] = None,
+    ):
+        """Initialize PyMuPDF processor.
+
+        Args:
+            extract_images: Whether to extract embedded images from PDFs
+            image_dir: Directory to store extracted images (defaults to temp directory)
+        """
+        self.extract_images = extract_images
+
+        if image_dir is None:
+            self.image_dir = pathlib.Path(tempfile.gettempdir()) / "pdf-images"
+        else:
+            self.image_dir = pathlib.Path(image_dir)
+
+        # Create image directory if it doesn't exist
+        if self.extract_images:
+            self.image_dir.mkdir(exist_ok=True, parents=True)
+            logger.info(
+                f"Initialized PyMuPDFProcessor with image extraction to {self.image_dir}"
+            )
+        else:
+            logger.info("Initialized PyMuPDFProcessor without image extraction")
+
+    @property
+    def name(self) -> str:
+        return "pymupdf"
+
+    @property
+    def supported_mime_types(self) -> set[str]:
+        return self.SUPPORTED_TYPES
+
+    async def process(
+        self,
+        content: bytes,
+        content_type: str,
+        filename: Optional[str] = None,
+        options: Optional[dict[str, Any]] = None,
+        progress_callback: Optional[
+            Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
+        ] = None,
+    ) -> ProcessingResult:
+        """Process a PDF document and extract text, metadata, and images.
+
+        Args:
+            content: PDF document bytes
+            content_type: MIME type (should be application/pdf)
+            filename: Optional filename for better error messages
+            options: Processing options (currently unused)
+            progress_callback: Optional callback for progress updates
+
+        Returns:
+            ProcessingResult with extracted text and metadata
+
+        Raises:
+            ProcessorError: If PDF processing fails
+        """
+        import anyio
+
+        try:
+            if progress_callback:
+                await progress_callback(0, 100, "Opening PDF document")
+
+            # Open document and extract metadata in thread
+            doc = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                lambda: pymupdf.open("pdf", content)
+            )
+
+            metadata = self._extract_metadata(doc, filename)
+            metadata["file_size"] = len(content)
+            page_count = doc.page_count
+
+            if progress_callback:
+                await progress_callback(10, 100, f"Extracting {page_count} pages")
+
+            # Prepare image directory if needed
+            pdf_image_dir = None
+            if self.extract_images:
+                pdf_id = filename.replace("/", "_") if filename else "unknown"
+                pdf_image_dir = self.image_dir / pdf_id
+                pdf_image_dir.mkdir(exist_ok=True, parents=True)
+
+            # Extract all pages in a single call with page_chunks=True
+            def do_extract() -> list[dict[str, Any]]:
+                # When page_chunks=True, to_markdown returns list[dict] not str
+                return pymupdf4llm.to_markdown(  # type: ignore[return-value]
+                    doc,
+                    write_images=self.extract_images,
+                    image_path=pdf_image_dir if self.extract_images else None,
+                    page_chunks=True,
+                )
+
+            page_chunks: list[dict[str, Any]] = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                do_extract
+            )
+
+            if progress_callback:
+                await progress_callback(90, 100, "Building result")
+
+            # Extract page texts and build boundaries from chunks
+            page_texts: list[str] = []
+            page_boundaries: list[dict[str, Any]] = []
+            current_offset = 0
+            for chunk in page_chunks:
+                text = chunk.get("text", "")
+                page_num = chunk.get("metadata", {}).get("page", len(page_texts) + 1)
+                page_texts.append(text)
+                page_boundaries.append(
+                    {
+                        "page": page_num,
+                        "start_offset": current_offset,
+                        "end_offset": current_offset + len(text),
+                    }
+                )
+                current_offset += len(text)
+
+            # Collect image paths
+            image_paths = []
+            if pdf_image_dir and pdf_image_dir.exists():
+                image_paths = [str(p) for p in pdf_image_dir.glob("*")]
+
+            # Build final text and metadata
+            md_text = "".join(page_texts)
+            metadata["has_images"] = len(image_paths) > 0
+            if image_paths:
+                metadata["image_count"] = len(image_paths)
+                metadata["image_paths"] = image_paths
+            metadata["page_boundaries"] = page_boundaries
+
+            # Close document
+            doc.close()
+
+            if progress_callback:
+                await progress_callback(100, 100, "Processing complete")
+
+            logger.info(
+                f"Successfully processed PDF {filename or '<bytes>'}: "
+                f"{metadata['page_count']} pages, {len(md_text)} chars, "
+                f"{metadata.get('image_count', 0)} images"
+            )
+
+            return ProcessingResult(
+                text=md_text,
+                metadata=metadata,
+                processor=self.name,
+                success=True,
+            )
+
+        except Exception as e:
+            error_msg = f"Failed to process PDF {filename or '<bytes>'}: {e}"
+            logger.error(error_msg, exc_info=True)
+            raise ProcessorError(error_msg) from e
+
+    def _extract_metadata(
+        self, doc: pymupdf.Document, filename: Optional[str]
+    ) -> dict[str, Any]:
+        """Extract metadata from PDF document.
+
+        Args:
+            doc: Opened PyMuPDF document
+            filename: Optional filename
+
+        Returns:
+            Dictionary with PDF metadata
+        """
+        metadata: dict[str, Any] = {}
+
+        # Basic document info
+        metadata["page_count"] = doc.page_count
+        metadata["format"] = "PDF 1." + str(
+            doc.pdf_version() if hasattr(doc, "pdf_version") else "?"  # type: ignore[call-non-callable]
+        )
+
+        if filename:
+            metadata["filename"] = filename
+
+        # Extract PDF metadata dictionary
+        pdf_metadata = doc.metadata
+        if pdf_metadata:
+            # Standard PDF metadata fields
+            if pdf_metadata.get("title"):
+                metadata["title"] = pdf_metadata["title"]
+            if pdf_metadata.get("author"):
+                metadata["author"] = pdf_metadata["author"]
+            if pdf_metadata.get("subject"):
+                metadata["subject"] = pdf_metadata["subject"]
+            if pdf_metadata.get("keywords"):
+                metadata["keywords"] = pdf_metadata["keywords"]
+            if pdf_metadata.get("creator"):
+                metadata["creator"] = pdf_metadata["creator"]
+            if pdf_metadata.get("producer"):
+                metadata["producer"] = pdf_metadata["producer"]
+            if pdf_metadata.get("creationDate"):
+                metadata["creation_date"] = pdf_metadata["creationDate"]
+            if pdf_metadata.get("modDate"):
+                metadata["modification_date"] = pdf_metadata["modDate"]
+
+        return metadata
+
+    async def health_check(self) -> bool:
+        """Check if PyMuPDF is available and working.
+
+        Returns:
+            True if processor is ready to use
+        """
+        try:
+            # Try to create a simple PDF in memory
+            test_doc = pymupdf.open()
+            test_doc.close()
+            return True
+        except Exception as e:
+            logger.error(f"PyMuPDF health check failed: {e}")
+            return False
@@ -1,6 +1,13 @@
 """Embedding service package for generating vector embeddings."""

-from .service import EmbeddingService, get_embedding_service
+from .bm25_provider import BM25SparseEmbeddingProvider
+from .service import EmbeddingService, get_bm25_service, get_embedding_service
 from .simple_provider import SimpleEmbeddingProvider

-__all__ = ["EmbeddingService", "get_embedding_service", "SimpleEmbeddingProvider"]
+__all__ = [
+    "EmbeddingService",
+    "get_embedding_service",
+    "BM25SparseEmbeddingProvider",
+    "get_bm25_service",
+    "SimpleEmbeddingProvider",
+]
@@ -0,0 +1,98 @@
+"""BM25 sparse embedding provider using FastEmbed."""
+
+import logging
+from typing import Any
+
+from fastembed import SparseTextEmbedding
+
+logger = logging.getLogger(__name__)
+
+
+class BM25SparseEmbeddingProvider:
+    """
+    BM25 sparse embedding provider for hybrid search.
+
+    Uses FastEmbed's BM25 model to generate sparse vectors for keyword-based
+    retrieval. These sparse vectors are combined with dense semantic vectors
+    in Qdrant using Reciprocal Rank Fusion (RRF) for hybrid search.
+
+    Unlike dense embeddings which have fixed dimensions, sparse embeddings
+    have variable-length vectors with (index, value) pairs representing
+    term frequencies in the BM25 vocabulary.
+    """
+
+    def __init__(self, model_name: str = "Qdrant/bm25"):
+        """
+        Initialize BM25 sparse embedding provider.
+
+        Args:
+            model_name: FastEmbed BM25 model name (default: Qdrant/bm25)
+        """
+        self.model_name = model_name
+        logger.info(f"Initializing BM25 sparse embedding provider: {model_name}")
+
+        # Initialize FastEmbed sparse embedding model
+        self.model = SparseTextEmbedding(model_name=model_name)
+        logger.info(f"BM25 sparse embedding model loaded: {model_name}")
+
+    def encode(self, text: str) -> dict[str, Any]:
+        """
+        Generate BM25 sparse embedding for a single text (synchronous).
+
+        Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
+
+        Args:
+            text: Input text to encode
+
+        Returns:
+            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
+        """
+        # FastEmbed returns a generator, take first result
+        sparse_embedding = next(iter(self.model.embed([text])))
+
+        return {
+            "indices": sparse_embedding.indices.tolist(),
+            "values": sparse_embedding.values.tolist(),
+        }
+
+    async def encode_async(self, text: str) -> dict[str, Any]:
+        """
+        Generate BM25 sparse embedding for a single text (async).
+
+        Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
+
+        Args:
+            text: Input text to encode
+
+        Returns:
+            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
+        """
+        import anyio
+
+        # Run CPU-bound BM25 encoding in thread pool
+        return await anyio.to_thread.run_sync(lambda: self.encode(text))  # type: ignore[attr-defined]
+
+    async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
+        """
+        Generate BM25 sparse embeddings for multiple texts (batched).
+
+        Args:
+            texts: List of texts to encode
+
+        Returns:
+            List of dictionaries with 'indices' and 'values' for each text
+        """
+        import anyio
+
+        # Run CPU-bound BM25 encoding in thread pool to avoid blocking event loop
+        sparse_embeddings = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+            lambda: list(self.model.embed(texts))
+        )
+
+        return [
+            {
+                "indices": emb.indices.tolist(),
+                "values": emb.values.tolist(),
+            }
+            for emb in sparse_embeddings
+        ]
@@ -17,6 +17,7 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
        base_url: str,
        model: str = "nomic-embed-text",
        verify_ssl: bool = True,
+        timeout=httpx.Timeout(timeout=120, connect=5),
    ):
        """
        Initialize Ollama embedding provider.
@@ -29,12 +30,14 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
        self.base_url = base_url.rstrip("/")
        self.model = model
        self.verify_ssl = verify_ssl
-        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=30.0)
-        self._dimension = 768  # nomic-embed-text default
+        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
+        self._dimension: int | None = None  # Will be detected dynamically
        logger.info(
            f"Initialized Ollama provider: {base_url} (model={model}, verify_ssl={verify_ssl})"
        )

+        self._check_model_is_loaded(autoload=True)
+
    async def embed(self, text: str) -> list[float]:
        """
        Generate embedding vector for text.
@@ -71,15 +74,55 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
            embeddings.append(embedding)
        return embeddings

+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+
+        This method queries the model to determine the actual dimension
+        instead of relying on hardcoded values.
+        """
+        if self._dimension is None:
+            logger.debug(f"Detecting embedding dimension for model {self.model}...")
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} for model {self.model}"
+            )
+
    def get_dimension(self) -> int:
        """
        Get embedding dimension.

        Returns:
-            Vector dimension (768 for nomic-embed-text)
+            Vector dimension for the configured model
+
+        Raises:
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
        """
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
        return self._dimension

+    def _check_model_is_loaded(self, autoload: bool = True):
+        response = httpx.get(f"{self.base_url}/api/tags")
+        response.raise_for_status()
+
+        models = [model["name"] for model in response.json().get("models", [])]
+        logger.info("Ollama has following models pre-loaded: %s", models)
+
+        if (self.model not in models) and autoload:
+            logger.warning(
+                "Embedding model '%s' not yet available in ollama, attempting to pull now...",
+                self.model,
+            )
+            response = httpx.post(
+                f"{self.base_url}/api/pull", json={"model": self.model}
+            )
+            response.raise_for_status()
+
    async def close(self):
        """Close HTTP client."""
        await self.client.aclose()
@@ -1,56 +1,30 @@
-"""Embedding service with provider detection."""
+"""Embedding service with provider detection.
+
+DEPRECATED: This module is maintained for backward compatibility.
+New code should use nextcloud_mcp_server.providers.get_provider() directly.
+"""

 import logging
-import os

-from .base import EmbeddingProvider
-from .ollama_provider import OllamaEmbeddingProvider
-from .simple_provider import SimpleEmbeddingProvider
+from nextcloud_mcp_server.providers import get_provider
+
+from .bm25_provider import BM25SparseEmbeddingProvider

 logger = logging.getLogger(__name__)


 class EmbeddingService:
-    """Unified embedding service with automatic provider detection."""
+    """
+    Unified embedding service with automatic provider detection.
+
+    DEPRECATED: This class wraps the new unified provider infrastructure
+    for backward compatibility. New code should use
+    nextcloud_mcp_server.providers.get_provider() directly.
+    """

    def __init__(self):
        """Initialize embedding service with auto-detected provider."""
-        self.provider = self._detect_provider()
-
-    def _detect_provider(self) -> EmbeddingProvider:
-        """
-        Auto-detect available embedding provider.
-
-        Checks environment variables in order:
-        1. OLLAMA_BASE_URL - Use Ollama provider (production)
-        2. OPENAI_API_KEY - Use OpenAI provider (future)
-        3. Fallback to SimpleEmbeddingProvider (testing/development)
-
-        Returns:
-            Configured embedding provider
-        """
-        # Ollama provider (production)
-        ollama_url = os.getenv("OLLAMA_BASE_URL")
-        if ollama_url:
-            logger.info(f"Using Ollama embedding provider: {ollama_url}")
-            return OllamaEmbeddingProvider(
-                base_url=ollama_url,
-                model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
-                verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
-            )
-
-        # OpenAI provider (future implementation)
-        # openai_key = os.getenv("OPENAI_API_KEY")
-        # if openai_key:
-        #     return OpenAIEmbeddingProvider(api_key=openai_key)
-
-        # Fallback to simple provider for development/testing
-        logger.warning(
-            "No embedding provider configured (OLLAMA_BASE_URL or OPENAI_API_KEY not set). "
-            "Using SimpleEmbeddingProvider for testing/development. "
-            "For production, configure an external embedding service."
-        )
-        return SimpleEmbeddingProvider(dimension=384)
+        self.provider = get_provider()

    async def embed(self, text: str) -> list[float]:
        """
@@ -109,3 +83,20 @@ def get_embedding_service() -> EmbeddingService:
    if _embedding_service is None:
        _embedding_service = EmbeddingService()
    return _embedding_service
+
+
+# BM25 sparse embedding singleton
+_bm25_service: BM25SparseEmbeddingProvider | None = None
+
+
+def get_bm25_service() -> BM25SparseEmbeddingProvider:
+    """
+    Get singleton BM25 sparse embedding service instance.
+
+    Returns:
+        Global BM25SparseEmbeddingProvider instance
+    """
+    global _bm25_service
+    if _bm25_service is None:
+        _bm25_service = BM25SparseEmbeddingProvider()
+    return _bm25_service
@@ -0,0 +1,170 @@
+"""Pydantic models for Nextcloud News app responses."""
+
+from typing import List
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from .base import BaseResponse
+
+
+class NewsFolder(BaseModel):
+    """Model for a News folder."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Folder ID")
+    name: str = Field(description="Folder name")
+
+
+class NewsFeed(BaseModel):
+    """Model for a News feed (RSS/Atom subscription)."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Feed ID")
+    url: str = Field(description="Feed URL")
+    title: str = Field(description="Feed title")
+    favicon_link: str | None = Field(
+        None, alias="faviconLink", description="Favicon URL"
+    )
+    link: str | None = Field(None, description="Website link")
+    added: int = Field(description="Unix timestamp when feed was added")
+    folder_id: int | None = Field(
+        None, alias="folderId", description="Parent folder ID"
+    )
+    unread_count: int = Field(
+        0, alias="unreadCount", description="Number of unread items"
+    )
+    ordering: int = Field(
+        0, description="Feed ordering (0=default, 1=oldest, 2=newest)"
+    )
+    pinned: bool = Field(False, description="Whether feed is pinned to top")
+    update_error_count: int = Field(
+        0, alias="updateErrorCount", description="Consecutive update failures"
+    )
+    last_update_error: str | None = Field(
+        None, alias="lastUpdateError", description="Last update error message"
+    )
+
+    @property
+    def has_errors(self) -> bool:
+        """Check if feed has update errors."""
+        return self.update_error_count > 0
+
+
+class NewsItem(BaseModel):
+    """Model for a News item (article) with full content."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Item ID")
+    guid: str = Field(description="Globally unique identifier")
+    guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID")
+    url: str | None = Field(None, description="Article URL")
+    title: str = Field(description="Article title")
+    author: str | None = Field(None, description="Article author")
+    pub_date: int | None = Field(
+        None, alias="pubDate", description="Publication timestamp"
+    )
+    body: str | None = Field(None, description="Article content (HTML)")
+    enclosure_mime: str | None = Field(
+        None, alias="enclosureMime", description="Enclosure MIME type"
+    )
+    enclosure_link: str | None = Field(
+        None, alias="enclosureLink", description="Enclosure URL"
+    )
+    media_thumbnail: str | None = Field(
+        None, alias="mediaThumbnail", description="Media thumbnail URL"
+    )
+    media_description: str | None = Field(
+        None, alias="mediaDescription", description="Media description"
+    )
+    feed_id: int = Field(alias="feedId", description="Parent feed ID")
+    unread: bool = Field(True, description="Whether item is unread")
+    starred: bool = Field(False, description="Whether item is starred")
+    rtl: bool = Field(False, description="Right-to-left text")
+    last_modified: int = Field(
+        alias="lastModified", description="Last modification timestamp"
+    )
+    fingerprint: str | None = Field(
+        None, description="Content fingerprint for deduplication"
+    )
+    content_hash: str | None = Field(
+        None, alias="contentHash", description="Content hash"
+    )
+
+
+class NewsItemSummary(BaseModel):
+    """Lightweight model for News item list responses."""
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    id: int = Field(description="Item ID")
+    title: str = Field(description="Article title")
+    feed_id: int = Field(alias="feedId", description="Parent feed ID")
+    unread: bool = Field(True, description="Whether item is unread")
+    starred: bool = Field(False, description="Whether item is starred")
+    pub_date: int | None = Field(
+        None, alias="pubDate", description="Publication timestamp"
+    )
+    url: str | None = Field(None, description="Article URL")
+    author: str | None = Field(None, description="Article author")
+
+
+class NewsStatus(BaseModel):
+    """Model for News app status."""
+
+    version: str = Field(description="News app version")
+    warnings: dict = Field(default_factory=dict, description="Configuration warnings")
+
+
+# --- Response Models ---
+
+
+class ListFoldersResponse(BaseResponse):
+    """Response model for listing folders."""
+
+    results: List[NewsFolder] = Field(description="List of folders")
+    total_count: int = Field(description="Total number of folders")
+
+
+class ListFeedsResponse(BaseResponse):
+    """Response model for listing feeds."""
+
+    results: List[NewsFeed] = Field(description="List of feeds")
+    starred_count: int = Field(0, description="Number of starred items")
+    newest_item_id: int | None = Field(None, description="ID of newest item")
+    total_count: int = Field(description="Total number of feeds")
+
+
+class ListItemsResponse(BaseResponse):
+    """Response model for listing items."""
+
+    results: List[NewsItemSummary] = Field(description="List of items")
+    total_count: int = Field(description="Number of items returned")
+    has_more: bool = Field(False, description="Whether more items exist")
+    oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)")
+
+
+class GetItemResponse(BaseResponse):
+    """Response model for getting a single item."""
+
+    item: NewsItem = Field(description="Full item details")
+
+
+class FeedHealthResponse(BaseResponse):
+    """Response model for feed health status."""
+
+    feed_id: int = Field(description="Feed ID")
+    title: str = Field(description="Feed title")
+    url: str = Field(description="Feed URL")
+    has_errors: bool = Field(description="Whether feed has update errors")
+    error_count: int = Field(description="Number of consecutive errors")
+    last_error: str | None = Field(None, description="Last error message")
+
+
+class GetStatusResponse(BaseResponse):
+    """Response model for app status."""
+
+    version: str = Field(description="News app version")
+    warnings: dict = Field(default_factory=dict, description="Configuration warnings")
@@ -10,7 +10,7 @@ from .base import BaseResponse
 class SemanticSearchResult(BaseModel):
    """Model for semantic search results with additional metadata."""

-    id: int = Field(description="Document ID")
+    id: int = Field(description="Document ID (int for all document types)")
    doc_type: str = Field(
        description="Document type (note, calendar_event, deck_card, etc.)"
    )
@@ -19,9 +19,45 @@ class SemanticSearchResult(BaseModel):
        default="", description="Document category (notes) or location (calendar)"
    )
    excerpt: str = Field(description="Excerpt from matching chunk")
-    score: float = Field(description="Semantic similarity score (0-1)")
+    score: float = Field(
+        description=(
+            "Relevance score (≥ 0.0, higher is better). "
+            "Score range depends on fusion method: "
+            "RRF produces scores in [0.0, 1.0], "
+            "DBSF can exceed 1.0 (sum of normalized scores from multiple systems)"
+        )
+    )
    chunk_index: int = Field(description="Index of matching chunk in document")
    total_chunks: int = Field(description="Total number of chunks in document")
+    chunk_start_offset: Optional[int] = Field(
+        default=None, description="Character position where chunk starts in document"
+    )
+    chunk_end_offset: Optional[int] = Field(
+        default=None, description="Character position where chunk ends in document"
+    )
+    page_number: Optional[int] = Field(
+        default=None, description="Page number for PDF documents"
+    )
+    # Context expansion fields (optional, populated when include_context=True)
+    has_context_expansion: bool = Field(
+        default=False, description="Whether context expansion was performed"
+    )
+    marked_text: Optional[str] = Field(
+        default=None,
+        description="Full text with position markers around matched chunk",
+    )
+    before_context: Optional[str] = Field(
+        default=None, description="Text before the matched chunk"
+    )
+    after_context: Optional[str] = Field(
+        default=None, description="Text after the matched chunk"
+    )
+    has_before_truncation: Optional[bool] = Field(
+        default=None, description="Whether before_context was truncated"
+    )
+    has_after_truncation: Optional[bool] = Field(
+        default=None, description="Whether after_context was truncated"
+    )


 class SemanticSearchResponse(BaseResponse):
@@ -12,7 +12,7 @@ import logging
 import sys
 from typing import Any

-from pythonjsonlogger import jsonlogger
+from pythonjsonlogger.json import JsonFormatter

 from nextcloud_mcp_server.observability.tracing import get_trace_context

@@ -37,13 +37,20 @@ class HealthCheckFilter(logging.Filter):
        """
        # Check if the log message contains health check endpoints
        message = record.getMessage()
-        return not any(
+        health_check = any(
            endpoint in message
-            for endpoint in ["/health/live", "/health/ready", "/metrics"]
+            for endpoint in [
+                "/health/live",
+                "/health/ready",
+                "/metrics",
+                "/app/vector-sync/status",
+            ]
        )

+        return not health_check

-class TraceContextFormatter(jsonlogger.JsonFormatter):
+
+class TraceContextFormatter(JsonFormatter):
    """
    JSON formatter that injects OpenTelemetry trace context into log records.

@@ -53,7 +60,7 @@ class TraceContextFormatter(jsonlogger.JsonFormatter):

    def add_fields(
        self,
-        log_record: dict[str, Any],
+        log_data: dict[str, Any],
        record: logging.LogRecord,
        message_dict: dict[str, Any],
    ) -> None:
@@ -61,28 +68,28 @@ class TraceContextFormatter(jsonlogger.JsonFormatter):
        Add custom fields to the log record, including trace context.

        Args:
-            log_record: Dictionary to be serialized as JSON
+            log_data: Dictionary to be serialized as JSON
            record: LogRecord instance
            message_dict: Dictionary of extra fields from log call
        """
        # Call parent to add standard fields
-        super().add_fields(log_record, record, message_dict)
+        super().add_fields(log_data, record, message_dict)

        # Add trace context if available
        trace_context = get_trace_context()
        if trace_context:
-            log_record["trace_id"] = trace_context.get("trace_id")
-            log_record["span_id"] = trace_context.get("span_id")
+            log_data["trace_id"] = trace_context.get("trace_id")
+            log_data["span_id"] = trace_context.get("span_id")

        # Add standard fields with consistent naming
-        log_record["timestamp"] = self.formatTime(record)
-        log_record["level"] = record.levelname
-        log_record["logger"] = record.name
-        log_record["message"] = record.getMessage()
+        log_data["timestamp"] = self.formatTime(record)
+        log_data["level"] = record.levelname
+        log_data["logger"] = record.name
+        log_data["message"] = record.getMessage()

        # Include exception info if present
        if record.exc_info:
-            log_record["exception"] = self.formatException(record.exc_info)
+            log_data["exception"] = self.formatException(record.exc_info)


 class TraceContextTextFormatter(logging.Formatter):
@@ -147,7 +154,7 @@ def setup_logging(
                datefmt="%Y-%m-%dT%H:%M:%S",
            )
        else:
-            formatter = jsonlogger.JsonFormatter(
+            formatter = JsonFormatter(
                "%(timestamp)s %(level)s %(name)s %(message)s",
                datefmt="%Y-%m-%dT%H:%M:%S",
            )
@@ -251,7 +258,7 @@ def get_uvicorn_logging_config(
        if include_trace_context:
            formatter_class = "nextcloud_mcp_server.observability.logging_config.TraceContextFormatter"
        else:
-            formatter_class = "pythonjsonlogger.jsonlogger.JsonFormatter"
+            formatter_class = "pythonjsonlogger.json.JsonFormatter"
        format_string = "%(timestamp)s %(level)s %(name)s %(message)s"
    else:
        if include_trace_context:
@@ -352,3 +352,115 @@ def record_dependency_check(dependency: str, duration: float) -> None:
        duration: Check duration in seconds
    """
    dependency_check_duration_seconds.labels(dependency=dependency).observe(duration)
+
+
+def record_vector_sync_scan(documents_found: int) -> None:
+    """
+    Record documents scanned during vector sync.
+
+    Args:
+        documents_found: Number of documents discovered in scan
+    """
+    vector_sync_documents_scanned_total.inc(documents_found)
+
+
+def record_vector_sync_processing(duration: float, status: str = "success") -> None:
+    """
+    Record document processing with duration and status.
+
+    Args:
+        duration: Processing duration in seconds
+        status: "success" or "error"
+    """
+    vector_sync_documents_processed_total.labels(status=status).inc()
+    vector_sync_processing_duration_seconds.observe(duration)
+
+
+def record_qdrant_operation(operation: str, status: str = "success") -> None:
+    """
+    Record Qdrant vector database operation.
+
+    Args:
+        operation: Operation type ("upsert", "search", "delete")
+        status: "success" or "error"
+    """
+    qdrant_operations_total.labels(operation=operation, status=status).inc()
+
+
+def update_vector_sync_queue_size(size: int) -> None:
+    """
+    Update vector sync queue size gauge.
+
+    Args:
+        size: Current queue size
+    """
+    vector_sync_queue_size.set(size)
+
+
+# =============================================================================
+# Decorator for Automatic Tool Instrumentation
+# =============================================================================
+
+
+def instrument_tool(func):
+    """
+    Decorator to automatically instrument MCP tool functions with metrics and tracing.
+
+    Wraps async tool functions to record execution time, success/error status, and
+    create OpenTelemetry trace spans. Compatible with @mcp.tool() and @require_scopes()
+    decorators.
+
+    Usage:
+        @mcp.tool()
+        @require_scopes("notes:write")
+        @instrument_tool
+        async def nc_notes_create_note(...):
+            ...
+
+    Args:
+        func: The async function to instrument
+
+    Returns:
+        Wrapped function with metrics and tracing instrumentation
+    """
+    import functools
+    import time
+
+    from nextcloud_mcp_server.observability.tracing import trace_operation
+
+    @functools.wraps(func)
+    async def wrapper(*args, **kwargs):
+        tool_name = func.__name__
+        start_time = time.time()
+
+        # Extract tool arguments for tracing (sanitize sensitive fields)
+        # kwargs contains the actual arguments passed to the tool
+        tool_args = {
+            k: v
+            for k, v in kwargs.items()
+            if k not in ("password", "token", "secret", "api_key", "etag", "ctx")
+        }
+
+        # Create trace span with metrics collection
+        with trace_operation(
+            f"mcp.tool.{tool_name}",
+            attributes={
+                "mcp.tool.name": tool_name,
+                "mcp.tool.args": str(tool_args)[:500]
+                if tool_args
+                else None,  # Limit to 500 chars
+            },
+            record_exception=True,
+        ):
+            try:
+                result = await func(*args, **kwargs)
+                duration = time.time() - start_time
+                record_tool_call(tool_name, duration, "success")
+                return result
+            except Exception as e:
+                duration = time.time() - start_time
+                record_tool_call(tool_name, duration, "error")
+                record_tool_error(tool_name, type(e).__name__)
+                raise
+
+    return wrapper
@@ -66,22 +66,44 @@ class ObservabilityMiddleware(BaseHTTPMiddleware):
        # Record start time
        start_time = time.time()

-        try:
-            # Create span for request (OpenTelemetry auto-instrumentation will create parent span)
-            with trace_operation(
-                f"HTTP {method} {endpoint}",
-                attributes={
-                    "http.method": method,
-                    "http.path": path,
-                    "http.scheme": request.url.scheme,
-                    "http.host": request.url.hostname,
-                },
-            ):
-                # Process request
-                response = await call_next(request)
+        # Skip tracing for health/metrics/polling endpoints to reduce noise
+        should_trace = not (
+            path.startswith("/health/")
+            or path == "/metrics"
+            or path == "/app/vector-sync/status"
+        )

-                # Add response status to span
-                add_span_attribute("http.status_code", response.status_code)
+        try:
+            if should_trace:
+                # Create span for request (OpenTelemetry auto-instrumentation will create parent span)
+                with trace_operation(
+                    f"HTTP {method} {endpoint}",
+                    attributes={
+                        "http.method": method,
+                        "http.path": path,
+                        "http.scheme": request.url.scheme,
+                        "http.host": request.url.hostname,
+                    },
+                ):
+                    # Process request
+                    response = await call_next(request)
+
+                    # Add response status to span
+                    add_span_attribute("http.status_code", response.status_code)
+
+                    # Record metrics
+                    duration = time.time() - start_time
+                    self._record_request_metrics(
+                        method=method,
+                        endpoint=endpoint,
+                        status_code=response.status_code,
+                        duration=duration,
+                    )
+
+                    return response
+            else:
+                # No tracing for health/metrics endpoints, but still record metrics
+                response = await call_next(request)

                # Record metrics
                duration = time.time() - start_time
@@ -13,9 +13,9 @@ import logging
 from contextlib import contextmanager
 from typing import Any

+from importlib_metadata import version
 from opentelemetry import trace
 from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
-from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
 from opentelemetry.instrumentation.logging import LoggingInstrumentor
 from opentelemetry.sdk.resources import Resource
 from opentelemetry.sdk.trace import TracerProvider
@@ -27,10 +27,13 @@ logger = logging.getLogger(__name__)
 # Global tracer instance (initialized in setup_tracing)
 _tracer: Tracer | None = None

+# Auto-instrument httpx for Nextcloud API calls
+

 def setup_tracing(
    service_name: str = "nextcloud-mcp-server",
    otlp_endpoint: str | None = None,
+    otlp_verify_ssl: bool = False,
    sampling_rate: float = 1.0,
 ) -> Tracer:
    """
@@ -40,6 +43,8 @@ def setup_tracing(
        service_name: Service name for traces (default: "nextcloud-mcp-server")
        otlp_endpoint: OTLP gRPC endpoint (e.g., "http://otel-collector:4317")
                      If None, tracing is initialized but no exporter is configured
+        otlp_verify_ssl: Enable TLS verification for otlp_endpoint. If True,
+                      `insecure` will eval to False
        sampling_rate: Sampling rate (0.0-1.0). Default 1.0 (100% sampling)

    Returns:
@@ -51,7 +56,7 @@ def setup_tracing(
    resource = Resource.create(
        {
            "service.name": service_name,
-            "service.version": "0.27.2",  # TODO: Extract from pyproject.toml
+            "service.version": version(__package__.split(".")[0]),
        }
    )

@@ -61,7 +66,9 @@ def setup_tracing(
    # Configure OTLP exporter if endpoint is provided
    if otlp_endpoint:
        try:
-            otlp_exporter = OTLPSpanExporter(endpoint=otlp_endpoint, insecure=True)
+            otlp_exporter = OTLPSpanExporter(
+                endpoint=otlp_endpoint, insecure=not otlp_verify_ssl
+            )
            span_processor = BatchSpanProcessor(otlp_exporter)
            provider.add_span_processor(span_processor)
            logger.info(
@@ -79,9 +86,6 @@ def setup_tracing(
    # Set global tracer provider
    trace.set_tracer_provider(provider)

-    # Auto-instrument httpx for Nextcloud API calls
-    HTTPXClientInstrumentor().instrument()
-
    # Auto-instrument logging to inject trace context
    LoggingInstrumentor().instrument(set_logging_format=True)

@@ -0,0 +1,20 @@
+"""Unified provider infrastructure for embeddings and text generation."""
+
+from .anthropic import AnthropicProvider
+from .base import Provider
+from .bedrock import BedrockProvider
+from .ollama import OllamaProvider
+from .openai import OpenAIProvider
+from .registry import get_provider, reset_provider
+from .simple import SimpleProvider
+
+__all__ = [
+    "Provider",
+    "OllamaProvider",
+    "OpenAIProvider",
+    "AnthropicProvider",
+    "SimpleProvider",
+    "BedrockProvider",
+    "get_provider",
+    "reset_provider",
+]
@@ -0,0 +1,99 @@
+"""Unified Anthropic provider for text generation."""
+
+import logging
+
+from anthropic import AsyncAnthropic
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class AnthropicProvider(Provider):
+    """
+    Anthropic provider for text generation.
+
+    Supports Claude models via the Anthropic API.
+    Note: Anthropic doesn't provide embedding models, only text generation.
+    """
+
+    def __init__(
+        self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022"
+    ):
+        """
+        Initialize Anthropic provider.
+
+        Args:
+            api_key: Anthropic API key
+            generation_model: Model name (e.g., "claude-3-5-sonnet-20241022")
+        """
+        self.client = AsyncAnthropic(api_key=api_key)
+        self.model = generation_model
+
+        logger.info(f"Initialized Anthropic provider (model={self.model})")
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return False
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return True
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text using Anthropic API.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+        """
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            temperature=0.7,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return message.content[0].text
+
+    async def close(self) -> None:
+        """Close the client (no-op for Anthropic SDK)."""
+        pass
@@ -0,0 +1,91 @@
+"""Unified provider interface for embeddings and text generation."""
+
+from abc import ABC, abstractmethod
+
+
+class Provider(ABC):
+    """
+    Unified base class for LLM providers.
+
+    Providers can support embeddings, text generation, or both.
+    Use capability properties to determine what features are available.
+    """
+
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts (optimized).
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension for this provider.
+
+        Returns:
+            Vector dimension (e.g., 768 for nomic-embed-text)
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If provider doesn't support generation
+        """
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close the provider and release resources."""
+        pass
@@ -0,0 +1,397 @@
+"""Amazon Bedrock provider for embeddings and text generation."""
+
+import json
+import logging
+from typing import Any
+
+try:
+    import boto3
+    from botocore.exceptions import BotoCoreError, ClientError
+
+    BOTO3_AVAILABLE = True
+except ImportError:
+    BOTO3_AVAILABLE = False
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class BedrockProvider(Provider):
+    """
+    Amazon Bedrock provider supporting both embeddings and text generation.
+
+    Uses AWS Bedrock Runtime API with boto3. Supports various model families:
+    - Embeddings: amazon.titan-embed-text-v1, amazon.titan-embed-text-v2, cohere.embed-*
+    - Text Generation: anthropic.claude-*, meta.llama3-*, amazon.titan-text-*, mistral.*, etc.
+
+    Requires AWS credentials configured via:
+    - Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
+    - AWS credentials file (~/.aws/credentials)
+    - IAM role (when running on AWS)
+    """
+
+    def __init__(
+        self,
+        region_name: str | None = None,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+    ):
+        """
+        Initialize Bedrock provider.
+
+        Args:
+            region_name: AWS region (e.g., "us-east-1"). Defaults to AWS_REGION env var.
+            embedding_model: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0").
+                None disables embeddings.
+            generation_model: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0").
+                None disables generation.
+            aws_access_key_id: AWS access key (optional, uses default credential chain if not provided)
+            aws_secret_access_key: AWS secret key (optional, uses default credential chain if not provided)
+
+        Raises:
+            ImportError: If boto3 is not installed
+        """
+        if not BOTO3_AVAILABLE:
+            raise ImportError(
+                "boto3 is required for Bedrock provider. Install with: pip install boto3"
+            )
+
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self._dimension: int | None = None  # Detected dynamically
+
+        # Initialize bedrock-runtime client
+        client_kwargs: dict[str, Any] = {}
+        if region_name:
+            client_kwargs["region_name"] = region_name
+        if aws_access_key_id:
+            client_kwargs["aws_access_key_id"] = aws_access_key_id
+        if aws_secret_access_key:
+            client_kwargs["aws_secret_access_key"] = aws_secret_access_key
+
+        self.client = boto3.client("bedrock-runtime", **client_kwargs)
+
+        logger.info(
+            f"Initialized Bedrock provider in region {region_name or 'default'} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model})"
+        )
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    def _create_embedding_request(self, text: str) -> dict[str, Any]:
+        """
+        Create model-specific embedding request payload.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Request payload dict for the embedding model
+        """
+        if not self.embedding_model:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        # Titan Embed models
+        if self.embedding_model.startswith("amazon.titan-embed"):
+            return {"inputText": text}
+
+        # Cohere Embed models
+        elif self.embedding_model.startswith("cohere.embed"):
+            return {"texts": [text], "input_type": "search_document"}
+
+        # Unknown model - try Titan format as default
+        else:
+            logger.warning(
+                f"Unknown embedding model format for {self.embedding_model}, "
+                "using Titan format as default"
+            )
+            return {"inputText": text}
+
+    def _parse_embedding_response(self, response: dict[str, Any]) -> list[float]:
+        """
+        Parse model-specific embedding response.
+
+        Args:
+            response: Raw response from Bedrock
+
+        Returns:
+            Embedding vector as list of floats
+        """
+        # Titan Embed models
+        if self.embedding_model and self.embedding_model.startswith(
+            "amazon.titan-embed"
+        ):
+            return response["embedding"]
+
+        # Cohere Embed models
+        elif self.embedding_model and self.embedding_model.startswith("cohere.embed"):
+            return response["embeddings"][0]
+
+        # Unknown model - try Titan format as default
+        else:
+            logger.warning(
+                f"Unknown embedding response format for {self.embedding_model}, "
+                "trying Titan format"
+            )
+            return response.get("embedding", response.get("embeddings", [None])[0])
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        try:
+            request_body = self._create_embedding_request(text)
+
+            response = self.client.invoke_model(
+                modelId=self.embedding_model,
+                body=json.dumps(request_body),
+                accept="application/json",
+                contentType="application/json",
+            )
+
+            response_body = json.loads(response["body"].read())
+            embedding = self._parse_embedding_response(response_body)
+
+            return embedding
+
+        except (BotoCoreError, ClientError) as e:
+            logger.error(f"Bedrock embedding error: {e}")
+            raise
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Note: Current implementation sends requests sequentially.
+        Future optimization could use asyncio for concurrent requests.
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        embeddings = []
+        for text in texts:
+            embedding = await self.embed(text)
+            embeddings.append(embedding)
+        return embeddings
+
+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+        """
+        if self._dimension is None and self.supports_embeddings:
+            logger.debug(
+                f"Detecting embedding dimension for model {self.embedding_model}..."
+            )
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
+        return self._dimension
+
+    def _create_generation_request(
+        self, prompt: str, max_tokens: int
+    ) -> dict[str, Any]:
+        """
+        Create model-specific text generation request payload.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Request payload dict for the generation model
+        """
+        if not self.generation_model:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        # Anthropic Claude models
+        if self.generation_model.startswith("anthropic.claude"):
+            return {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens,
+                "temperature": 0.7,
+                "messages": [{"role": "user", "content": prompt}],
+            }
+
+        # Meta Llama models
+        elif self.generation_model.startswith("meta.llama"):
+            return {"prompt": prompt, "max_gen_len": max_tokens, "temperature": 0.7}
+
+        # Amazon Titan Text models
+        elif self.generation_model.startswith("amazon.titan-text"):
+            return {
+                "inputText": prompt,
+                "textGenerationConfig": {
+                    "maxTokenCount": max_tokens,
+                    "temperature": 0.7,
+                },
+            }
+
+        # Mistral models
+        elif self.generation_model.startswith("mistral"):
+            return {"prompt": prompt, "max_tokens": max_tokens, "temperature": 0.7}
+
+        # Unknown model - try Claude format as default
+        else:
+            logger.warning(
+                f"Unknown generation model format for {self.generation_model}, "
+                "using Claude format as default"
+            )
+            return {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens,
+                "temperature": 0.7,
+                "messages": [{"role": "user", "content": prompt}],
+            }
+
+    def _parse_generation_response(self, response: dict[str, Any]) -> str:
+        """
+        Parse model-specific text generation response.
+
+        Args:
+            response: Raw response from Bedrock
+
+        Returns:
+            Generated text
+        """
+        # Anthropic Claude models
+        if self.generation_model and self.generation_model.startswith(
+            "anthropic.claude"
+        ):
+            return response["content"][0]["text"]
+
+        # Meta Llama models
+        elif self.generation_model and self.generation_model.startswith("meta.llama"):
+            return response["generation"]
+
+        # Amazon Titan Text models
+        elif self.generation_model and self.generation_model.startswith(
+            "amazon.titan-text"
+        ):
+            return response["results"][0]["outputText"]
+
+        # Mistral models
+        elif self.generation_model and self.generation_model.startswith("mistral"):
+            return response["outputs"][0]["text"]
+
+        # Unknown model - try common response fields
+        else:
+            logger.warning(
+                f"Unknown generation response format for {self.generation_model}, "
+                "trying common fields"
+            )
+            # Try common response field names
+            for field in ["text", "generation", "outputText", "completion"]:
+                if field in response:
+                    return response[field]
+            # Last resort: return JSON string
+            return json.dumps(response)
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        try:
+            request_body = self._create_generation_request(prompt, max_tokens)
+
+            response = self.client.invoke_model(
+                modelId=self.generation_model,
+                body=json.dumps(request_body),
+                accept="application/json",
+                contentType="application/json",
+            )
+
+            response_body = json.loads(response["body"].read())
+            text = self._parse_generation_response(response_body)
+
+            return text
+
+        except (BotoCoreError, ClientError) as e:
+            logger.error(f"Bedrock generation error: {e}")
+            raise
+
+    async def close(self) -> None:
+        """Close the client (no-op for boto3 clients)."""
+        pass
@@ -0,0 +1,234 @@
+"""Unified Ollama provider for embeddings and text generation."""
+
+import logging
+
+import httpx
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class OllamaProvider(Provider):
+    """
+    Ollama provider supporting both embeddings and text generation.
+
+    Supports TLS, SSL verification, and automatic model loading.
+    """
+
+    def __init__(
+        self,
+        base_url: str,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        verify_ssl: bool = True,
+        timeout: httpx.Timeout | None = None,
+    ):
+        """
+        Initialize Ollama provider.
+
+        Args:
+            base_url: Ollama API base URL (e.g., https://ollama.internal.example.com:443)
+            embedding_model: Model for embeddings (e.g., "nomic-embed-text"). None disables embeddings.
+            generation_model: Model for text generation (e.g., "llama3.2:1b"). None disables generation.
+            verify_ssl: Verify SSL certificates (default: True)
+            timeout: HTTP timeout configuration
+        """
+        self.base_url = base_url.rstrip("/")
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self.verify_ssl = verify_ssl
+
+        if timeout is None:
+            timeout = httpx.Timeout(timeout=120, connect=5)
+
+        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
+        self._dimension: int | None = None  # Detected dynamically for embeddings
+
+        logger.info(
+            f"Initialized Ollama provider: {base_url} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model}, "
+            f"verify_ssl={verify_ssl})"
+        )
+
+        # Pre-check and auto-load models
+        if embedding_model:
+            self._check_model_is_loaded(embedding_model, autoload=True)
+        if generation_model:
+            self._check_model_is_loaded(generation_model, autoload=True)
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        response = await self.client.post(
+            f"{self.base_url}/api/embeddings",
+            json={"model": self.embedding_model, "prompt": text},
+        )
+        response.raise_for_status()
+        return response.json()["embedding"]
+
+    async def embed_batch(
+        self, texts: list[str], batch_size: int = 32
+    ) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts using Ollama's batch API.
+
+        Uses /api/embed endpoint with array input for efficient batch processing.
+        Conservative batch size (32) prevents quality degradation observed in
+        Ollama issue #6262 with larger batches.
+
+        Note: Ollama processes batches serially, not in parallel.
+
+        Args:
+            texts: List of texts to embed
+            batch_size: Maximum texts per batch (default: 32)
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        all_embeddings = []
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i : i + batch_size]
+            response = await self.client.post(
+                f"{self.base_url}/api/embed",
+                json={"model": self.embedding_model, "input": batch},
+            )
+            response.raise_for_status()
+            all_embeddings.extend(response.json()["embeddings"])
+
+        return all_embeddings
+
+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+
+        This method queries the model to determine the actual dimension
+        instead of relying on hardcoded values.
+        """
+        if self._dimension is None and self.supports_embeddings:
+            logger.debug(
+                f"Detecting embedding dimension for model {self.embedding_model}..."
+            )
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
+        return self._dimension
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        response = await self.client.post(
+            f"{self.base_url}/api/generate",
+            json={
+                "model": self.generation_model,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "num_predict": max_tokens,
+                    "temperature": 0.7,
+                },
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data["response"]
+
+    def _check_model_is_loaded(self, model: str, autoload: bool = True):
+        """
+        Check if model is loaded in Ollama, optionally auto-loading it.
+
+        Args:
+            model: Model name to check
+            autoload: Whether to automatically pull the model if not loaded
+        """
+        response = httpx.get(f"{self.base_url}/api/tags")
+        response.raise_for_status()
+
+        models = [m["name"] for m in response.json().get("models", [])]
+        logger.info("Ollama has following models pre-loaded: %s", models)
+
+        if (model not in models) and autoload:
+            logger.warning(
+                "Model '%s' not yet available in ollama, attempting to pull now...",
+                model,
+            )
+            response = httpx.post(f"{self.base_url}/api/pull", json={"model": model})
+            response.raise_for_status()
+
+    async def close(self) -> None:
+        """Close HTTP client."""
+        await self.client.aclose()
@@ -0,0 +1,271 @@
+"""Unified OpenAI provider for embeddings and text generation.
+
+Supports:
+- OpenAI's standard API
+- GitHub Models API (models.github.ai)
+- Any OpenAI-compatible API via base_url override
+"""
+
+import logging
+from functools import wraps
+
+import anyio
+from openai import AsyncOpenAI, RateLimitError
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+# Rate limit retry configuration
+MAX_RETRIES = 5
+INITIAL_RETRY_DELAY = 2.0  # seconds
+MAX_RETRY_DELAY = 60.0  # seconds
+
+
+def retry_on_rate_limit(func):
+    """Decorator to retry on OpenAI rate limit errors with exponential backoff."""
+
+    @wraps(func)
+    async def wrapper(*args, **kwargs):
+        retry_delay = INITIAL_RETRY_DELAY
+        last_error: Exception | None = None
+
+        for attempt in range(1, MAX_RETRIES + 1):
+            try:
+                return await func(*args, **kwargs)
+            except RateLimitError as e:
+                last_error = e
+                if attempt < MAX_RETRIES:
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt}/{MAX_RETRIES}), "
+                        f"retrying in {retry_delay:.1f}s..."
+                    )
+                    await anyio.sleep(retry_delay)
+                    retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY)
+
+        logger.error(f"Rate limit exceeded after {MAX_RETRIES} attempts")
+        raise last_error  # type: ignore[misc]
+
+    return wrapper
+
+
+# Well-known embedding dimensions for OpenAI models
+OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = {
+    "text-embedding-3-small": 1536,
+    "text-embedding-3-large": 3072,
+    "text-embedding-ada-002": 1536,
+    # GitHub Models API uses openai/ prefix
+    "openai/text-embedding-3-small": 1536,
+    "openai/text-embedding-3-large": 3072,
+}
+
+
+class OpenAIProvider(Provider):
+    """
+    OpenAI provider supporting both embeddings and text generation.
+
+    Works with:
+    - OpenAI's standard API (api.openai.com)
+    - GitHub Models API (models.github.ai)
+    - Any OpenAI-compatible API (via base_url)
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str | None = None,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        timeout: float = 120.0,
+    ):
+        """
+        Initialize OpenAI provider.
+
+        Args:
+            api_key: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
+            base_url: Base URL override (e.g., "https://models.github.ai/inference")
+            embedding_model: Model for embeddings (e.g., "text-embedding-3-small").
+                            None disables embeddings.
+            generation_model: Model for text generation (e.g., "gpt-4o-mini").
+                             None disables generation.
+            timeout: HTTP timeout in seconds (default: 120)
+        """
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self._dimension: int | None = None
+
+        # Initialize async client
+        self.client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=timeout,
+        )
+
+        # Try to get known dimension without API call
+        if embedding_model and embedding_model in OPENAI_EMBEDDING_DIMENSIONS:
+            self._dimension = OPENAI_EMBEDDING_DIMENSIONS[embedding_model]
+
+        logger.info(
+            f"Initialized OpenAI provider: base_url={base_url or 'default'} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model}, "
+            f"dimension={self._dimension})"
+        )
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    @retry_on_rate_limit
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        assert self.embedding_model is not None  # Type narrowing
+        response = await self.client.embeddings.create(
+            input=text,
+            model=self.embedding_model,
+        )
+
+        embedding = response.data[0].embedding
+
+        # Update dimension if not set
+        if self._dimension is None:
+            self._dimension = len(embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+        return embedding
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts using OpenAI's batch API.
+
+        OpenAI supports up to 2048 inputs per request.
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if not texts:
+            return []
+
+        # OpenAI supports batches up to 2048, but use smaller batches for safety
+        batch_size = 100
+        all_embeddings: list[list[float]] = []
+
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i : i + batch_size]
+
+            # Use helper method with retry logic for each batch
+            batch_embeddings = await self._embed_batch_request(batch)
+            all_embeddings.extend(batch_embeddings)
+
+            # Update dimension if not set
+            if self._dimension is None and batch_embeddings:
+                self._dimension = len(batch_embeddings[0])
+                logger.info(
+                    f"Detected embedding dimension: {self._dimension} "
+                    f"for model {self.embedding_model}"
+                )
+
+        return all_embeddings
+
+    @retry_on_rate_limit
+    async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]:
+        """Make a single batch embedding request with retry logic."""
+        assert self.embedding_model is not None  # Type narrowing
+        response = await self.client.embeddings.create(
+            input=batch,
+            model=self.embedding_model,
+        )
+        # Sort by index to maintain order
+        sorted_data = sorted(response.data, key=lambda x: x.index)
+        return [item.embedding for item in sorted_data]
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call embed first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call embed() first or use a known model."
+            )
+        return self._dimension
+
+    @retry_on_rate_limit
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        response = await self.client.chat.completions.create(
+            model=self.generation_model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=max_tokens,
+            temperature=0.7,
+        )
+
+        return response.choices[0].message.content or ""
+
+    async def close(self) -> None:
+        """Close HTTP client."""
+        await self.client.close()
@@ -0,0 +1,156 @@
+"""Provider registry and factory for auto-detection and instantiation."""
+
+import logging
+import os
+
+from .base import Provider
+from .bedrock import BedrockProvider
+from .ollama import OllamaProvider
+from .openai import OpenAIProvider
+from .simple import SimpleProvider
+
+logger = logging.getLogger(__name__)
+
+
+class ProviderRegistry:
+    """
+    Registry for provider auto-detection and instantiation.
+
+    Checks environment variables in priority order and creates appropriate provider:
+    1. Bedrock (AWS_REGION + BEDROCK_*_MODEL)
+    2. OpenAI (OPENAI_API_KEY)
+    3. Ollama (OLLAMA_BASE_URL)
+    4. Simple (fallback for testing/development)
+    """
+
+    @staticmethod
+    def create_provider() -> Provider:
+        """
+        Auto-detect and create provider based on environment variables.
+
+        Priority order:
+        1. Bedrock - if AWS_REGION or BEDROCK_EMBEDDING_MODEL is set
+        2. OpenAI - if OPENAI_API_KEY is set
+        3. Ollama - if OLLAMA_BASE_URL is set
+        4. Simple - fallback for testing/development
+
+        Returns:
+            Provider instance
+
+        Environment Variables:
+            Bedrock:
+                - AWS_REGION: AWS region (e.g., "us-east-1")
+                - AWS_ACCESS_KEY_ID: AWS access key (optional, uses credential chain)
+                - AWS_SECRET_ACCESS_KEY: AWS secret key (optional)
+                - BEDROCK_EMBEDDING_MODEL: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+                - BEDROCK_GENERATION_MODEL: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+            OpenAI:
+                - OPENAI_API_KEY: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
+                - OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
+                - OPENAI_EMBEDDING_MODEL: Model for embeddings (default: "text-embedding-3-small")
+                - OPENAI_GENERATION_MODEL: Model for text generation (e.g., "gpt-4o-mini")
+
+            Ollama:
+                - OLLAMA_BASE_URL: Ollama API base URL (e.g., "http://localhost:11434")
+                - OLLAMA_EMBEDDING_MODEL: Model for embeddings (default: "nomic-embed-text")
+                - OLLAMA_GENERATION_MODEL: Model for text generation (e.g., "llama3.2:1b")
+                - OLLAMA_VERIFY_SSL: Verify SSL certificates (default: "true")
+
+            Simple (no configuration needed, fallback):
+                - SIMPLE_EMBEDDING_DIMENSION: Embedding dimension (default: 384)
+        """
+        # 1. Check for Bedrock
+        aws_region = os.getenv("AWS_REGION")
+        bedrock_embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL")
+        bedrock_generation_model = os.getenv("BEDROCK_GENERATION_MODEL")
+
+        if aws_region or bedrock_embedding_model or bedrock_generation_model:
+            logger.info(
+                f"Using Bedrock provider: region={aws_region}, "
+                f"embedding_model={bedrock_embedding_model}, "
+                f"generation_model={bedrock_generation_model}"
+            )
+            return BedrockProvider(
+                region_name=aws_region,
+                embedding_model=bedrock_embedding_model,
+                generation_model=bedrock_generation_model,
+                aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+                aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+            )
+
+        # 2. Check for OpenAI
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        if openai_api_key:
+            base_url = os.getenv("OPENAI_BASE_URL")
+            embedding_model = os.getenv(
+                "OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
+            )
+            generation_model = os.getenv("OPENAI_GENERATION_MODEL")
+
+            logger.info(
+                f"Using OpenAI provider: base_url={base_url or 'default'}, "
+                f"embedding_model={embedding_model}, "
+                f"generation_model={generation_model}"
+            )
+            return OpenAIProvider(
+                api_key=openai_api_key,
+                base_url=base_url,
+                embedding_model=embedding_model,
+                generation_model=generation_model,
+            )
+
+        # 3. Check for Ollama (local LLM)
+        ollama_url = os.getenv("OLLAMA_BASE_URL")
+        if ollama_url:
+            embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
+            generation_model = os.getenv("OLLAMA_GENERATION_MODEL")
+            verify_ssl = os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true"
+
+            logger.info(
+                f"Using Ollama provider: {ollama_url}, "
+                f"embedding_model={embedding_model}, "
+                f"generation_model={generation_model}"
+            )
+            return OllamaProvider(
+                base_url=ollama_url,
+                embedding_model=embedding_model,
+                generation_model=generation_model,
+                verify_ssl=verify_ssl,
+            )
+
+        # 4. Fallback to Simple provider for development/testing
+        dimension = int(os.getenv("SIMPLE_EMBEDDING_DIMENSION", "384"))
+        logger.warning(
+            "No provider configured (AWS_REGION, OPENAI_API_KEY, OLLAMA_BASE_URL not set). "
+            "Using SimpleProvider for testing/development. "
+            "For production, configure Bedrock, OpenAI, or Ollama."
+        )
+        return SimpleProvider(dimension=dimension)
+
+
+# Singleton instance
+_provider: Provider | None = None
+
+
+def get_provider() -> Provider:
+    """
+    Get singleton provider instance.
+
+    Returns:
+        Global Provider instance (auto-detected on first call)
+    """
+    global _provider
+    if _provider is None:
+        _provider = ProviderRegistry.create_provider()
+    return _provider
+
+
+def reset_provider():
+    """
+    Reset singleton provider instance.
+
+    Useful for testing or reconfiguration.
+    """
+    global _provider
+    _provider = None
--- a/Show More
+++ b/Show More