Update README

bump: version 0.48.1 → 0.48.2
Merge pull request #350 from cbcoutinho/feature/openai-provider-support
2025-11-23 03:27:20 +00:00 · 2025-11-23 03:25:23 +00:00 · 2025-11-23 04:24:55 +01:00 · 2025-11-23 04:23:50 +01:00 · 2025-11-23 04:20:47 +01:00 · 2025-11-23 04:20:09 +01:00
116 changed files with 20287 additions and 1321 deletions
@@ -5,3 +5,5 @@
 !uv.lock

 !nextcloud_mcp_server/**/*.py
+!nextcloud_mcp_server/**/*.html
+!nextcloud_mcp_server/auth/static/*
@@ -15,12 +15,12 @@ jobs:
      packages: write
    steps:
      - name: Check out
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
        with:
          fetch-depth: 0
          token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
      - name: Create bump and changelog
-        uses: commitizen-tools/commitizen-action@5b0848cd060263e24602d1eba03710e056ef7711 # 0.24.0
+        uses: commitizen-tools/commitizen-action@bb4f1df6601e2a1a891506581b0c53acdc88e07d # 0.26.0
        with:
          github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
          changelog_increment_filename: body.md
@@ -12,7 +12,7 @@ jobs:
      packages: write
    steps:
      - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6

      - name: Docker meta
        id: meta
@@ -14,7 +14,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
        with:
          fetch-depth: 0

@@ -0,0 +1,113 @@
+name: RAG Evaluation
+
+on:
+  workflow_dispatch:
+    inputs:
+      manual_path:
+        description: 'Path to Nextcloud User Manual PDF in Nextcloud'
+        required: false
+        default: 'Nextcloud Manual.pdf'
+      embedding_model:
+        description: 'OpenAI embedding model'
+        required: false
+        default: 'openai/text-embedding-3-small'
+      generation_model:
+        description: 'OpenAI generation model'
+        required: false
+        default: 'openai/gpt-4o-mini'
+
+jobs:
+  rag-evaluation:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      models: read
+
+    steps:
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
+        with:
+          submodules: 'true'
+
+      ###### Required to build OIDC App ######
+      - name: Set up php 8.4
+        uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
+        with:
+          php-version: 8.4
+          coverage: none
+
+      - name: Install OIDC app composer dependencies
+        run: |
+          cd third_party/oidc
+          composer install --no-dev
+      ###### Required to build OIDC App ######
+
+      - name: Run docker compose with vector sync
+        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
+        with:
+          compose-file: "./docker-compose.yml"
+          up-flags: "--build"
+        env:
+          # Override MCP container environment for OpenAI + vector sync
+          VECTOR_SYNC_ENABLED: "true"
+          VECTOR_SYNC_SCAN_INTERVAL: "5"
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
+
+      - name: Wait for Nextcloud to be ready
+        run: |
+          echo "Waiting for Nextcloud..."
+          max_attempts=60
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "Service did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+            sleep 5
+          done
+          echo "Nextcloud is ready."
+
+      - name: Wait for MCP server to be ready
+        run: |
+          echo "Waiting for MCP server..."
+          max_attempts=30
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "MCP server did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
+            sleep 2
+          done
+          echo "MCP server is ready."
+
+      - name: Run RAG evaluation tests
+        env:
+          NEXTCLOUD_HOST: "http://localhost:8080"
+          NEXTCLOUD_USERNAME: "admin"
+          NEXTCLOUD_PASSWORD: "admin"
+          RAG_MANUAL_PATH: ${{ inputs.manual_path }}
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+        run: |
+          uv run pytest tests/integration/test_rag_openai.py -v --log-cli-level=INFO
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: rag-evaluation-results
+          path: |
+            pytest-results.xml
+          retention-days: 30
@@ -18,9 +18,9 @@ jobs:
      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
      - name: Install uv
-        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Install Python 3.11
        run: uv python install 3.11
      - name: Build
@@ -9,9 +9,9 @@ jobs:
  linting:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
      - name: Check format
        run: |
          uv run --frozen ruff format --diff
@@ -27,7 +27,7 @@ jobs:
    runs-on: ubuntu-latest

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
        with:
          submodules: 'true'

@@ -56,7 +56,7 @@ jobs:
          up-flags: "--build"

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
+        uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4

      - name: Install Playwright dependencies
        run: |
@@ -85,4 +85,4 @@ jobs:
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
        run: |
-          uv run pytest -v --log-cli-level=WARN --ignore=tests/manual
+          uv run pytest -v --log-cli-level=WARN -m unit -m smoke
@@ -13,3 +13,6 @@ docker-compose.override.yml
 # Generated by pytest used to login users
 .nextcloud_oauth_*.json
 .playwright-mcp/
+
+# RAG Evaluation
+tests/rag_evaluation/fixtures/
@@ -1,6 +1,6 @@
-[submodule "oidc"]
-	path = third_party/oidc
-	url = https://github.com/cbcoutinho/oidc
 [submodule "third_party/oidc"]
 	path = third_party/oidc
 	url = https://github.com/cbcoutinho/oidc
+[submodule "third_party/notes"]
+	path = third_party/notes
+	url = https://github.com/cbcoutinho/notes
@@ -1,3 +1,242 @@
+## v0.48.2 (2025-11-23)
+
+### Fix
+
+- Share vector sync state with FastMCP session lifespan via module singleton
+- Share vector sync state with FastMCP session lifespan via module singleton
+
+## v0.48.1 (2025-11-23)
+
+### Fix
+
+- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
+
+### Refactor
+
+- Move background tasks to server lifespan and deprecate SSE transport
+
+## v0.48.0 (2025-11-23)
+
+### Feat
+
+- Add tag management methods to WebDAV client
+
+## v0.47.0 (2025-11-23)
+
+### Feat
+
+- Add OpenAI provider support for embeddings and generation
+
+## v0.46.2 (2025-11-22)
+
+### Fix
+
+- **smithery**: Enable JSON response format for scanner compatibility
+
+## v0.46.1 (2025-11-22)
+
+### Perf
+
+- Optimize vector viz search performance
+
+## v0.46.0 (2025-11-22)
+
+### Feat
+
+- Add Smithery CLI deployment support
+- Implement ADR-016 Smithery stateless deployment mode
+
+### Fix
+
+- **smithery**: Add JSON Schema metadata to mcp-config endpoint
+- **smithery**: Use container runtime pattern for config discovery
+- Add Smithery lifespan and auth mode detection
+
+## v0.45.0 (2025-11-22)
+
+### Feat
+
+- Add context expansion to semantic search with chunk overlap removal
+- Use Ollama native batch API in embed_batch()
+- Implement Qdrant placeholder state management
+- Switch files to use numeric IDs with file_path resolution
+- Implement per-chunk vector visualization with context expansion
+
+### Fix
+
+- Use alpha_composite for proper RGBA highlight blending
+- Remove pymupdf.layout.activate() to fix page_chunks behavior
+- Centralize PDF processing and generate separate images per chunk
+- Set is_placeholder=False in processor to fix search filtering
+- Increase placeholder staleness threshold to 5x scan interval
+- Add placeholder staleness check to prevent duplicate processing
+- Use empty SparseVector instead of None for placeholders
+- Return empty array instead of null for query_coords when no results
+- Align PDF text extraction between indexing and context expansion
+- Update models and viz to use int-only doc_id
+- Reconstruct full content for notes to match indexed offsets
+- Add async/await, PDF metadata, and type safety fixes
+
+### Refactor
+
+- Simplify PDF text extraction with single to_markdown call
+
+### Perf
+
+- Optimize PDF processing with parallel extraction and single-render highlights
+
+## v0.44.1 (2025-11-21)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.22,<1.23
+
+## v0.44.0 (2025-11-19)
+
+### Feat
+
+- Improve vector visualization with static assets and fixes
+- Redesign UI to match Nextcloud ecosystem aesthetic
+
+### Fix
+
+- Improve 3D plot rendering with explicit dimensions and window resize support
+- Preserve 3D plot camera and improve documentation
+- Preserve 3D plot camera position and fix CSS loading
+
+## v0.43.0 (2025-11-18)
+
+### Feat
+
+- Replace custom document chunker with LangChain MarkdownTextSplitter
+
+## v0.42.0 (2025-11-17)
+
+### Feat
+
+- **viz**: Add dual-score display and improve UI controls
+
+## v0.41.0 (2025-11-17)
+
+### Feat
+
+- add configurable fusion algorithms for BM25 hybrid search
+- add chunk position tracking to vector indexing and search
+- add vector viz template and chunk context endpoint
+
+### Fix
+
+- prevent infinite loop in DocumentChunker with position tracking
+- Relax SearchResult validation to support DBSF fusion scores > 1.0
+
+## v0.40.0 (2025-11-16)
+
+### Feat
+
+- add unified provider architecture with Amazon Bedrock support
+
+### Fix
+
+- suppress Starlette middleware type warnings in ty checker
+
+## v0.39.0 (2025-11-16)
+
+### Feat
+
+- Implement BM25 hybrid search with native Qdrant RRF fusion
+
+### Fix
+
+- Handle named vectors in visualization and semantic search
+- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
+- Update viz routes to use BM25 hybrid search after refactor
+
+## v0.38.0 (2025-11-16)
+
+### Feat
+
+- add concurrent uploads and --force flag to upload command
+- implement RAG evaluation framework with CLI tooling
+
+### Fix
+
+- download qrels from BEIR ZIP instead of HuggingFace
+
+### Refactor
+
+- migrate asyncio to anyio for consistent structured concurrency
+- replace httpx client with NextcloudClient in upload command
+
+### Perf
+
+- Eliminate double-fetching in semantic search sampling
+- fix vector viz search performance and visual encoding
+- make note deletion concurrent in upload --force
+
+## v0.37.0 (2025-11-16)
+
+### Feat
+
+- Add OpenTelemetry tracing to @instrument_tool decorator
+
+## v0.36.0 (2025-11-15)
+
+### BREAKING CHANGE
+
+- Search algorithms now require Qdrant to be populated.
+Vector sync must be enabled and documents indexed for search to work.
+
+### Feat
+
+- Normalize hybrid search RRF scores to 0-1 range
+- Enhance vector visualization UI and parallelize search verification
+- Add Vector Viz tab to app home page
+- Add vector visualization pane with multi-select document types
+- Implement custom PCA to remove sklearn dependency
+- Add multi-document Protocol with cross-app search support
+- Update nc_semantic_search tool with algorithm selection
+- Implement unified search algorithm module
+
+### Fix
+
+- Reorder tabs and fix viz pane session access
+
+### Refactor
+
+- Optimize Nextcloud access verification with centralized filtering
+- Make all search algorithms query Qdrant payload, not Nextcloud
+
+### Perf
+
+- Exclude vector-sync status polling from distributed tracing
+
+## v0.35.0 (2025-11-15)
+
+### Feat
+
+- Enable SSE transport for mcp service and update test fixtures
+
+## v0.34.2 (2025-11-13)
+
+### Fix
+
+- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
+
+## v0.34.1 (2025-11-13)
+
+### Fix
+
+- return all notes when search query is empty
+
+## v0.34.0 (2025-11-13)
+
+### Feat
+
+- Complete Phase 5 - Instrument all 93 MCP tools
+- Add instrumentation decorator and apply to notes tools (Phase 5)
+- Add OAuth token and database metrics (Phases 3-4)
+- Add metrics instrumentation for queue, health, and database operations
+
 ## v0.33.1 (2025-11-13)

 ### Fix
@@ -5,23 +5,29 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Coding Conventions

 ### async/await Patterns
- **Use anyio + asyncio hybrid** - Both libraries are available
+- **Use anyio for all async operations** - Provides structured concurrency
  - pytest runs in `anyio` mode (`anyio_mode = "auto"` in pyproject.toml)
-  - asyncio used in auth modules (refresh_token_storage.py, token_exchange.py, token_broker.py)
-  - anyio used in calendar.py, client_registration.py, app.py
+  - Use `anyio.create_task_group()` for concurrent execution (NOT `asyncio.gather()`)
+  - Use `anyio.Lock()` for synchronization primitives (NOT `asyncio.Lock()`)
+  - Use `anyio.run()` for entry points (NOT `asyncio.run()`)
  - Prefer standard async/await syntax without explicit library imports when possible
+  - Examples: app.py, search/hybrid.py, search/verification.py, auth/token_broker.py

 ### Type Hints
 - **Use Python 3.10+ union syntax**: `str | None` instead of `Optional[str]`
 - **Use lowercase generics**: `dict[str, Any]` instead of `Dict[str, Any]`
 - **Type all function signatures** - Parameters and return types
- **No explicit type checker configured** - Ruff handles linting only
+- **Type checker**: `ty` is configured for static type checking
+  ```bash
+  uv run ty check -- nextcloud_mcp_server
+  ```

 ### Code Quality
- **Run ruff before committing**:
+- **Run ruff and ty before committing**:
  ```bash
  uv run ruff check
  uv run ruff format
+  uv run ty check -- nextcloud_mcp_server
  ```
 - **Ruff configuration** in pyproject.toml (extends select: ["I"] for import sorting)

@@ -55,8 +61,60 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 - `nextcloud_mcp_server/server/` - MCP tool/resource definitions
 - `nextcloud_mcp_server/auth/` - OAuth/OIDC authentication
 - `nextcloud_mcp_server/models/` - Pydantic response models
+- `nextcloud_mcp_server/providers/` - Unified LLM provider infrastructure (embeddings + generation)
 - `tests/` - Layered test suite (unit, smoke, integration, load)

+### Provider Architecture (ADR-015)
+
+**Unified Provider System** for embeddings and text generation:
+
+**Location:** `nextcloud_mcp_server/providers/`
+- `base.py` - `Provider` ABC with optional capabilities
+- `registry.py` - Auto-detection and factory pattern
+- `ollama.py` - Ollama provider (embeddings + generation)
+- `anthropic.py` - Anthropic provider (generation only)
+- `bedrock.py` - Amazon Bedrock provider (embeddings + generation)
+- `simple.py` - Simple in-memory provider (embeddings only, fallback)
+
+**Usage:**
+```python
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Auto-detects from environment
+
+# Check capabilities
+if provider.supports_embeddings:
+    embeddings = await provider.embed_batch(texts)
+
+if provider.supports_generation:
+    text = await provider.generate("prompt", max_tokens=500)
+```
+
+**Environment Variables:**
+
+Bedrock:
+- `AWS_REGION` - AWS region (e.g., "us-east-1")
+- `BEDROCK_EMBEDDING_MODEL` - Embedding model ID (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL` - Generation model ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` - Optional, uses AWS credential chain
+
+Ollama:
+- `OLLAMA_BASE_URL` - API URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL` - Embedding model (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL` - Generation model (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL` - SSL verification (default: "true")
+
+Simple (fallback, no config needed):
+- `SIMPLE_EMBEDDING_DIMENSION` - Dimension (default: 384)
+
+**Auto-Detection Priority:** Bedrock → Ollama → Simple
+
+**Backward Compatibility:**
+- Old code using `nextcloud_mcp_server.embedding.get_embedding_service()` still works
+- `EmbeddingService` now wraps `get_provider()` internally
+
+**For Details:** See `docs/ADR-015-unified-provider-architecture.md`
+
 ## Development Commands (Quick Reference)

 ### Testing
@@ -1,17 +1,24 @@
-FROM ghcr.io/astral-sh/uv:0.9.9-python3.11-alpine@sha256:0faa7934fac1db7f5056f159c1224d144bab864fd2677a4066d25a686ae32edd
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/

 # Install dependencies
 # 1. git (required for caldav dependency from git)
 # 2. sqlite for development with token db
-RUN apk add --no-cache git sqlite
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git \
+    tesseract-ocr \
+    sqlite3 && apt clean

 WORKDIR /app

 COPY . .

-RUN uv sync --locked --no-dev --no-editable
+RUN uv sync --locked --no-dev --no-editable --no-cache

 ENV PYTHONUNBUFFERED=1
 ENV VIRTUAL_ENV=/app/.venv
+ENV PATH=/app/.vnev/bin:$PATH
+ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

 ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
@@ -0,0 +1,44 @@
+# Dockerfile for Smithery stateless deployment
+# ADR-016: Stateless mode for multi-user public Nextcloud instances
+#
+# This image excludes:
+# - Vector database dependencies (qdrant-client)
+# - Background sync workers
+# - Admin UI routes (/app)
+# - Semantic search tools
+#
+# Features included:
+# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
+# - Per-session app password authentication
+# - Multi-user support via Smithery session config
+
+FROM docker.io/library/python:3.12-slim-trixie@sha256:b43ff04d5df04ad5cabb80890b7ef74e8410e3395b19af970dcd52d7a4bff921
+
+WORKDIR /app
+
+# Install uv for fast dependency management
+COPY --from=ghcr.io/astral-sh/uv:0.9.11@sha256:5aa820129de0a600924f166aec9cb51613b15b68f1dcd2a02f31a500d2ede568 /uv /uvx /bin/
+
+# Install dependencies
+# 1. git (required for caldav dependency from git)
+# 2. sqlite for development with token db
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git
+
+# Copy project files
+COPY . .
+
+RUN uv sync --locked --no-dev --no-editable --no-cache
+
+# Set Smithery mode environment variables
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081 by default
+EXPOSE 8081
+
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
+
+CMD ["/app/.venv/bin/smithery-main"]
@@ -1,5 +1,11 @@
+```markdown
+<p align="center">
+  <img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
+</p>
+
 # Nextcloud MCP Server

+[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
 [![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)

 **A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
@@ -13,7 +19,20 @@ This is a **dedicated standalone MCP server** designed for external MCP clients

 ## Quick Start

-Get up and running in 60 seconds using Docker:
+The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
+
+1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
+2. Click "Deploy" and configure:
+   - **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
+   - **Username**: Your Nextcloud username
+   - **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
+
+> [!NOTE]
+> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
+
+## Docker (Self-Hosted)
+
+For full features including semantic search, run with Docker:

 ```bash
 # 1. Create a minimal configuration
@@ -29,10 +48,15 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \

 # 3. Test the connection
 curl http://127.0.0.1:8000/health/ready
+
+# 4. Connect to the endpoint
+http://127.0.0.1:8000/sse
+
+# Or with --transport streamable-http
+http://127.0.0.1:8000/mcp
 ```

 **Next Steps:**
- Create an app password in Nextcloud: Settings → Security → Devices & sessions
 - Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
 - See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)

@@ -123,6 +147,7 @@ This enables natural language queries and helps discover related content across
 - **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
 - **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
 - **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
+- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing

 ### Advanced Topics
 - **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works (experimental)
@@ -199,3 +224,4 @@ This project is licensed under the AGPL-3.0 License. See [LICENSE](./LICENSE) fo
 - [Model Context Protocol](https://github.com/modelcontextprotocol)
 - [MCP Python SDK](https://github.com/modelcontextprotocol/python-sdk)
 - [Nextcloud](https://nextcloud.com/)
+```
@@ -2,4 +2,30 @@

 set -euox pipefail

-php /var/www/html/occ app:enable notes
+echo "Installing and configuring notes app for testing..."
+
+# Check if development notes app is mounted at /opt/apps/notes
+if [ -d /opt/apps/notes ]; then
+    echo "Development notes app found at /opt/apps/notes"
+
+    # Remove any existing notes app in apps (from app store or old symlink)
+    if [ -e /var/www/html/custom_apps/notes ]; then
+        echo "Removing existing notes in apps..."
+        rm -rf /var/www/html/custom_apps/notes
+    fi
+
+    # Create symlink from apps to the mounted development version
+    # Per Nextcloud docs: apps outside server root need symlinks in server root
+    echo "Creating symlink: custom_apps/notes -> /opt/apps/notes"
+    ln -sf /opt/apps/notes /var/www/html/custom_apps/notes
+
+    echo "Enabling notes app from /opt/apps (development mode via symlink)"
+    php /var/www/html/occ app:enable notes
+elif [ -d /var/www/html/custom_apps/notes ]; then
+    echo "notes app directory found in apps (already installed)"
+    php /var/www/html/occ app:enable notes
+else
+    echo "notes app not found, installing from app store..."
+    php /var/www/html/occ app:install notes
+    php /var/www/html/occ app:enable notes
+fi
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
+  <rect width="512" height="512" rx="80" ry="80" fill="#0082C9"/>
+  <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+</svg>
@@ -1,9 +1,9 @@
 dependencies:
 - name: qdrant
  repository: https://qdrant.github.io/qdrant-helm
-  version: 1.15.5
+  version: 1.16.0
 - name: ollama
  repository: https://otwld.github.io/ollama-helm
-  version: 1.34.0
-digest: sha256:d51c97d05be2614b751c0dd7267ef7dc959eff5ebef859c5f895c5c554b7a874
-generated: "2025-11-09T17:08:02.86648061Z"
+  version: 1.35.0
+digest: sha256:da8db198b12ce0252df220fabb297cfe69186edb8e67952c52e05de778189b92
+generated: "2025-11-21T11:09:07.997781541Z"
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.33.1
-appVersion: "0.33.1"
+version: 0.48.2
+appVersion: "0.48.2"
 keywords:
  - nextcloud
  - mcp
@@ -27,10 +27,10 @@ annotations:
  grafana_dashboard_folder: "Nextcloud MCP"
 dependencies:
  - name: qdrant
-    version: "1.15.5"
+    version: "1.16.0"
    repository: https://qdrant.github.io/qdrant-helm
    condition: qdrant.networkMode.deploySubchart
  - name: ollama
-    version: "1.34.0"
+    version: "1.35.0"
    repository: https://otwld.github.io/ollama-helm
    condition: ollama.enabled
@@ -3,7 +3,7 @@ services:
  # https://hub.docker.com/_/mariadb
  db:
    # Note: Check the recommend version here: https://docs.nextcloud.com/server/latest/admin_manual/installation/system_requirements.html#server
-    image: docker.io/library/mariadb:lts@sha256:404ebf26ed7a56fbab05c29f6f1e70188e5eadb51bba8cee8d355775776deb08
+    image: docker.io/library/mariadb:lts@sha256:1cac8492bd78b1ec693238dc600be173397efd7b55eabc725abc281dc855b482
    restart: always
    command: --transaction-isolation=READ-COMMITTED
    volumes:
@@ -17,11 +17,11 @@ services:
  # Note: Redis is an external service. You can find more information about the configuration here:
  # https://hub.docker.com/_/redis
  redis:
-    image: docker.io/library/redis:alpine@sha256:28c9c4d7596949a24b183eaaab6455f8e5d55ecbf72d02ff5e2c17fe72671d31
+    image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
    restart: always

  app:
-    image: docker.io/library/nextcloud:32.0.1@sha256:5b043f7ea2f609d5ff5635f475c30d303bec17775a5c3f7fa435e3818e669120
+    image: docker.io/library/nextcloud:32.0.2@sha256:ac08482d73ffd85d94069ba291bbd5fb39a70ff21502030a2e3e2d89a7246a48
    restart: always
    ports:
      - 0.0.0.0:8080:80
@@ -69,23 +69,25 @@ services:

  mcp:
    build: .
-    command: ["--transport", "streamable-http"]
    restart: always
+    command: ["--transport", "streamable-http"]
    depends_on:
      app:
        condition: service_healthy
    ports:
      - 127.0.0.1:8000:8000
+      - 127.0.0.1:9090:9090
    volumes:
      - mcp-data:/app/data
    environment:
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_USERNAME=admin
      - NEXTCLOUD_PASSWORD=admin
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080

      # Vector sync configuration (ADR-007)
      - VECTOR_SYNC_ENABLED=true
-      - VECTOR_SYNC_SCAN_INTERVAL=10
+      - VECTOR_SYNC_SCAN_INTERVAL=60
      - VECTOR_SYNC_PROCESSOR_WORKERS=1

      #- LOG_FORMAT=json
@@ -156,7 +158,7 @@ services:
      - oauth-tokens:/app/data

  keycloak:
-    image: quay.io/keycloak/keycloak:26.4.4@sha256:c6459d5fae1b759f5d667ebdc6237ab3121379c3494e213898569014ede1846d
+    image: quay.io/keycloak/keycloak:26.4.5@sha256:653852bfdea2be6e958b9e90a976eff1c6de34edd55f2f679bdc48ef16bc528e
    command:
      - "start-dev"
      - "--import-realm"
@@ -193,8 +195,8 @@ services:
      # Provider auto-detected from OIDC_DISCOVERY_URL issuer
      # Using internal Docker hostname for discovery to get consistent issuer
      - OIDC_DISCOVERY_URL=http://keycloak:8080/realms/nextcloud-mcp/.well-known/openid-configuration
-      - OIDC_CLIENT_ID=nextcloud-mcp-server
-      - OIDC_CLIENT_SECRET=mcp-secret-change-in-production
+      - NEXTCLOUD_OIDC_CLIENT_ID=nextcloud-mcp-server
+      - NEXTCLOUD_OIDC_CLIENT_SECRET=mcp-secret-change-in-production
      - OIDC_JWKS_URI=http://keycloak:8080/realms/nextcloud-mcp/protocol/openid-connect/certs

      # Nextcloud API endpoint (for accessing APIs with validated token)
@@ -222,8 +224,28 @@ services:
      - keycloak-tokens:/app/data
      - keycloak-oauth-storage:/app/.oauth

+  # Smithery stateless deployment mode (ADR-016)
+  # Test with: docker compose --profile smithery up smithery
+  # Then: curl http://localhost:8081/.well-known/mcp-config
+  smithery:
+    build:
+      context: .
+      dockerfile: Dockerfile.smithery
+    restart: always
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8081:8081
+    environment:
+      - SMITHERY_DEPLOYMENT=true
+      - VECTOR_SYNC_ENABLED=false
+      - PORT=8081
+    profiles:
+      - smithery
+
  qdrant:
-    image: qdrant/qdrant:v1.15.5@sha256:0fb8897412abc81d1c0430a899b9a81eb8328aa634e7242d1bc804c1fe8fe863
+    image: qdrant/qdrant:v1.16.0@sha256:1005201498cf927d835383d0f918b17d8c9da7db58550f169f694455e42d78f4
    restart: always
    ports:
      - 127.0.0.1:6333:6333  # REST API
@@ -1,7 +1,8 @@
 # ADR-011: Improving Semantic Search Quality Through Better Chunking and Embeddings

-**Status**: Proposed
+**Status**: Partially Implemented (Chunking Complete, Embeddings Pending)
 **Date**: 2025-11-12
+**Implementation Date**: 2025-11-18 (Chunking)
 **Authors**: Development Team
 **Related**: ADR-003 (Vector Database Architecture), ADR-008 (MCP Sampling for RAG)

@@ -893,3 +894,50 @@ This ADR addresses the root causes of poor semantic search recall:
 - No new infrastructure or ongoing costs

 **Next Steps**: Approve ADR → Implement changes → Reindex → Validate → Production rollout
+
+## Implementation Status
+
+### Completed (2025-11-18)
+
+**✅ Semantic Markdown-Aware Chunking (Option C1 + C3 Hybrid)**
+
+Implementation details:
+- Replaced custom word-based chunking with `MarkdownTextSplitter` from LangChain
+- Optimized for Nextcloud Notes markdown content with special handling for:
+  - Headers (`#`, `##`, `###`, etc.)
+  - Code blocks (` ``` `)
+  - Lists (`-`, `*`, `1.`)
+  - Horizontal rules (`---`)
+  - Paragraphs and sentences
+- Maintained `ChunkWithPosition` interface for backward compatibility
+- Updated configuration defaults:
+  - `DOCUMENT_CHUNK_SIZE`: 512 words → 2048 characters
+  - `DOCUMENT_CHUNK_OVERLAP`: 50 words → 200 characters
+- Updated unit tests to verify position tracking and boundary preservation
+- All tests passing with markdown-aware character-based chunking
+
+**Files Modified**:
+- `nextcloud_mcp_server/vector/document_chunker.py` - LangChain integration
+- `nextcloud_mcp_server/config.py` - Character-based defaults
+- `tests/unit/test_document_chunker.py` - Updated test suite
+
+**Dependencies Added**:
+- `langchain-text-splitters>=1.0.0` (already present in `pyproject.toml`)
+
+**Migration Required**:
+- ⚠️ Full reindex required to apply new chunking strategy
+- Existing documents in vector database use old word-based chunks
+- See "Migration Strategy" section above for reindexing process
+
+### Pending
+
+**⏳ Embedding Model Upgrade (Option E1)**
+
+Still to be implemented:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Implement dynamic dimension detection in `ollama_provider.py`
+- Create migration script for collection reindexing
+- Run benchmarking to validate improvement
+- Deploy to production with atomic collection swap
+
+**Estimated Timeline**: 1-2 weeks for implementation and validation
@@ -0,0 +1,619 @@
+# ADR-012: Unified Multi-Algorithm Search with Client-Configurable Weighting
+
+## Status
+Proposed
+
+## Context
+
+### Current State
+
+The Nextcloud MCP server currently provides semantic search via vector similarity (Qdrant), as designed in ADR-003 and implemented through ADR-007. However, users and MCP clients have limited control over search behavior:
+
+1. **Single algorithm only**: Only pure vector similarity search is available
+2. **No algorithm selection**: MCP clients cannot choose between semantic, keyword, or fuzzy approaches
+3. **No weighting control**: Clients cannot adjust the balance between different search methods
+4. **Disconnected implementations**: Viz pane uses different search algorithms than MCP tools
+5. **Limited flexibility**: No way to optimize search for different use cases (exact match vs. conceptual similarity)
+
+### User Needs
+
+Different search scenarios require different algorithms:
+
+- **Exact match queries**: "Find note titled 'Q1 Budget'" → keyword search preferred
+- **Conceptual queries**: "What are my goals for next quarter?" → semantic search preferred
+- **Typo-tolerant queries**: "Find note about kuberntes" → fuzzy search needed
+- **Balanced queries**: "Find documentation about API endpoints" → hybrid search optimal
+
+Additionally, users need a **testing interface** (viz pane) to:
+- Experiment with different search algorithms on their own documents
+- Visualize search results and algorithm behavior
+- Tune weights for optimal results
+- Understand which algorithm works best for their queries
+
+### Technical Requirements
+
+1. **Unified interface**: Single MCP tool supporting multiple algorithms
+2. **Client control**: MCP clients specify algorithm and weights via tool parameters
+3. **Backward compatibility**: Existing `nc_semantic_search()` behavior preserved
+4. **Shared implementation**: Viz pane and MCP tools use identical search algorithms
+5. **User accessibility**: Viz pane available to all logged-in users with vector sync enabled
+6. **Performance**: Minimal overhead for algorithm selection
+
+## Decision
+
+We will implement a **unified multi-algorithm search architecture** with the following components:
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                         MCP Client / User Browser                            │
+│                                                                               │
+│  ┌──────────────────────────┐         ┌──────────────────────────────────┐  │
+│  │   MCP Tool Call          │         │   Viz Pane (Browser UI)          │  │
+│  │                          │         │                                  │  │
+│  │ nc_semantic_search(      │         │ - Algorithm selector dropdown    │  │
+│  │   query="kubernetes",    │         │ - Weight adjustment sliders      │  │
+│  │   algorithm="hybrid",    │         │ - Interactive 2D scatter plot    │  │
+│  │   semantic_weight=0.5,   │         │ - Side-by-side comparison        │  │
+│  │   keyword_weight=0.3,    │         │ - Real-time search testing       │  │
+│  │   fuzzy_weight=0.2       │         │                                  │  │
+│  │ )                        │         │                                  │  │
+│  └───────────┬──────────────┘         └────────────┬─────────────────────┘  │
+└──────────────┼─────────────────────────────────────┼────────────────────────┘
+               │                                      │
+               │ MCP Protocol                         │ HTTPS (htmx)
+               │                                      │
+┌──────────────▼──────────────────────────────────────▼────────────────────────┐
+│                        MCP Server (/app endpoint)                             │
+│                                                                               │
+│  ┌─────────────────────────────────────────────────────────────────────────┐ │
+│  │              Unified Search Interface (server/semantic.py)              │ │
+│  │                                                                         │ │
+│  │  @mcp.tool() nc_semantic_search(algorithm, weights...)                 │ │
+│  │  ├─ Validate parameters (weights sum ≤1.0)                             │ │
+│  │  ├─ Dispatch to algorithm selector                                     │ │
+│  │  └─ Return ranked SearchResponse                                       │ │
+│  └────────────────────────────┬────────────────────────────────────────────┘ │
+│                                │                                              │
+│  ┌────────────────────────────▼────────────────────────────────────────────┐ │
+│  │              Algorithm Dispatcher (search/algorithms.py)                │ │
+│  │                                                                         │ │
+│  │  if algorithm == "semantic":    → semantic.py                          │ │
+│  │  if algorithm == "keyword":     → keyword.py                           │ │
+│  │  if algorithm == "fuzzy":       → fuzzy.py                             │ │
+│  │  if algorithm == "hybrid":      → hybrid.py (RRF fusion)               │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+│                                                                               │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────┐           │
+│  │  semantic.py     │  │  keyword.py      │  │  fuzzy.py        │           │
+│  │                  │  │                  │  │                  │           │
+│  │ • Query Qdrant   │  │ • Token matching │  │ • Char overlap   │           │
+│  │ • Cosine dist    │  │ • Title weight   │  │ • 70% threshold  │           │
+│  │ • Score ≥0.7     │  │ • ADR-001 logic  │  │ • Simple impl    │           │
+│  └────────┬─────────┘  └────────┬─────────┘  └────────┬─────────┘           │
+│           │                     │                      │                     │
+│           └─────────────────────┼──────────────────────┘                     │
+│                                 │                                            │
+│  ┌──────────────────────────────▼──────────────────────────────────────────┐ │
+│  │                    hybrid.py (Reciprocal Rank Fusion)                   │ │
+│  │                                                                         │ │
+│  │  1. Run algorithms in parallel (semantic, keyword, fuzzy)              │ │
+│  │  2. Collect ranked results from each                                   │ │
+│  │  3. Apply RRF formula: score = weight / (k + rank)                     │ │
+│  │  4. Combine scores across algorithms                                   │ │
+│  │  5. Re-rank by combined score                                          │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+└───────────────────────────────────┬───────────────────────────────────────────┘
+                                    │
+                    ┌───────────────┴───────────────┐
+                    │                               │
+         ┌──────────▼──────────┐         ┌─────────▼────────────┐
+         │ Qdrant Vector DB    │         │ Nextcloud APIs       │
+         │                     │         │                      │
+         │ • Vector search     │         │ • Access verification│
+         │ • user_id filter    │         │ • Full metadata fetch│
+         │ • Score threshold   │         │ • Permission checks  │
+         │ • 768-dim embeddings│         │                      │
+         └─────────────────────┘         └──────────────────────┘
+```
+
+### Data Flow
+
+#### MCP Tool Request
+```
+1. Client calls nc_semantic_search(query, algorithm="hybrid", weights...)
+2. Server validates parameters (weights sum ≤1.0)
+3. Dispatcher routes to hybrid.py
+4. Hybrid search runs semantic, keyword, fuzzy in parallel
+5. RRF combines results with weighted scores
+6. Access verification via Nextcloud API
+7. Return ranked SearchResponse to client
+```
+
+#### Viz Pane Request (Server-Side Processing)
+```
+1. User navigates to /app (Vector Visualization tab)
+2. Browser loads vector-viz fragment via htmx
+3. User enters query and adjusts algorithm/weights
+4. htmx sends request to /app/vector-viz endpoint
+5. Server executes search via search/algorithms.py:
+   - Filters by user_id (multi-tenant security)
+   - Applies selected algorithm (semantic/keyword/fuzzy/hybrid)
+   - Filters by document type (notes/files/calendar/contacts)
+   - Retrieves matching results + metadata
+6. Server performs PCA reduction (768-dim → 2D):
+   - Converts matching results to 2D coordinates
+   - Only sends coordinates + metadata (not full vectors)
+   - Dramatically reduces bandwidth (e.g., 768 floats → 2 floats per doc)
+7. Server returns JSON: {results: [...], coordinates_2d: [...], stats: {...}}
+8. Browser receives lightweight response
+9. Plotly.js renders interactive scatter plot
+10. Matching results highlighted (blue), non-matches grayed (40% opacity)
+```
+
+**Performance Benefits of Server-Side Processing**:
+- **Bandwidth reduction**: ~384x less data (2 floats vs 768 floats per document)
+- **Client efficiency**: Browser only handles visualization, not computation
+- **Scalability**: Can visualize 10,000+ documents without client-side lag
+- **Security**: Raw vectors never leave server
+- **Consistency**: Same search logic as MCP tool (no drift)
+
+### 1. Core Search Algorithms
+
+Four search algorithms will be available:
+
+#### a) Semantic Search (Vector Similarity)
+- **Method**: Cosine distance in 768-dimensional embedding space
+- **Implementation**: Qdrant `query_points` with user_id filtering
+- **Use case**: Conceptual queries, finding related content
+- **Current status**: Implemented in `nextcloud_mcp_server/server/semantic.py`
+
+#### b) Keyword Search (Token-Based)
+- **Method**: Token matching with weighted scoring (from ADR-001)
+- **Implementation**: Title matches weighted 3x higher than content
+- **Use case**: Exact phrase matching, known titles
+- **Current status**: Designed in ADR-001, not implemented
+
+#### c) Fuzzy Search (Character Overlap)
+- **Method**: Simple character-based similarity (70% threshold)
+- **Implementation**: Character set comparison (current viz pane approach)
+- **Use case**: Typo tolerance, approximate matching
+- **Current status**: Implemented in viz pane only
+
+#### d) Hybrid Search (Multi-Algorithm Fusion)
+- **Method**: Reciprocal Rank Fusion (RRF) from ADR-003
+- **Implementation**: Parallel execution + score combination
+- **Use case**: Balanced queries, general-purpose search
+- **Current status**: Designed in ADR-003, not implemented
+
+### 2. Unified MCP Tool Interface
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(
+    query: str,
+    ctx: Context,
+    limit: int = 10,
+    score_threshold: float = 0.7,
+    algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+    semantic_weight: float = 0.5,
+    keyword_weight: float = 0.3,
+    fuzzy_weight: float = 0.2,
+) -> SearchResponse:
+    """
+    Search Nextcloud content using configurable algorithms.
+
+    Args:
+        query: Natural language search query
+        ctx: MCP context for authentication
+        limit: Maximum results to return
+        score_threshold: Minimum similarity score (semantic/hybrid only)
+        algorithm: Search algorithm to use
+        semantic_weight: Weight for semantic results (hybrid only, default: 0.5)
+        keyword_weight: Weight for keyword results (hybrid only, default: 0.3)
+        fuzzy_weight: Weight for fuzzy results (hybrid only, default: 0.2)
+
+    Returns:
+        Ranked search results with scores and excerpts
+    """
+```
+
+**Key decisions**:
+- **Single tool name**: Keep `nc_semantic_search` for backward compatibility
+- **Algorithm parameter**: Explicit selection via enum
+- **Weight parameters**: Client-configurable, only apply to hybrid mode
+- **Validation**: Weights must sum to ≤1.0, enforced server-side
+- **Defaults**: Hybrid mode with balanced weights (semantic 50%, keyword 30%, fuzzy 20%)
+
+### 3. Shared Algorithm Implementation
+
+Extract search algorithms into reusable module:
+
+```
+nextcloud_mcp_server/
+├── search/
+│   ├── __init__.py
+│   ├── algorithms.py          # Core search implementations
+│   ├── semantic.py             # Vector similarity search
+│   ├── keyword.py              # Token-based search (ADR-001)
+│   ├── fuzzy.py                # Character overlap search
+│   └── hybrid.py               # RRF fusion (ADR-003)
+└── server/
+    └── semantic.py             # MCP tool wrapper
+```
+
+**Benefits**:
+- Viz pane and MCP tools share identical implementations
+- Testable in isolation
+- Easy to add new algorithms (e.g., BM25, neural reranking)
+- Clear separation of concerns
+
+### 4. Viz Pane Integration
+
+Update viz pane (`nextcloud_mcp_server/auth/userinfo_routes.py`) to:
+
+1. **Use shared algorithms**: Import from `search/algorithms.py`
+2. **Server-side filtering**: All search and filtering operations happen server-side
+   - Query execution via shared search backend
+   - Document type filtering (notes, files, calendar, contacts)
+   - User ID filtering for multi-tenant security
+   - Only matching results + metadata sent to client
+   - Reduces bandwidth and improves performance
+3. **PCA reduction**: Server performs dimensionality reduction (768-dim → 2D)
+   - Only 2D coordinates sent to browser for visualization
+   - Dramatically reduces data transfer vs sending full vectors
+   - Enables visualization of large document collections
+4. **User accessibility**: Available to all users with vector sync enabled
+5. **Security**: Filter results by `user_id` (only show user's own documents)
+6. **Interactive testing**: Allow users to:
+   - Select algorithm type
+   - Adjust weights (hybrid mode)
+   - Compare results across algorithms
+   - Visualize result distribution in 2D space
+
+#### Viz Pane UI Components
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│ Vector Visualization                                          [Status] │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Configuration                                             │  │
+│ │                                                                  │  │
+│ │ Query: [_______________________________________________] [Search]│  │
+│ │                                                                  │  │
+│ │ Algorithm: [Hybrid ▼]  [Semantic] [Keyword] [Fuzzy]             │  │
+│ │                                                                  │  │
+│ │ Weights (Hybrid Mode):                                           │  │
+│ │   Semantic: [========50========] 0.5                             │  │
+│ │   Keyword:  [======30======    ] 0.3                             │  │
+│ │   Fuzzy:    [====20====        ] 0.2                             │  │
+│ │                                                                  │  │
+│ │ Document Types: ☑ Notes  ☑ Files  ☑ Calendar  ☑ Contacts        │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Vector Space Visualization (PCA 2D Projection)                   │  │
+│ │                                                                  │  │
+│ │        ▲                                                         │  │
+│ │    PC2 │     ●  ● ●      🔵 Matching results (full opacity)     │  │
+│ │        │  ●     ●  ●     ⚪ Non-matching results (40% opacity)   │  │
+│ │        │    🔵  ● ●                                              │  │
+│ │        │  ●  🔵  ●       Hover: Show document title + excerpt    │  │
+│ │        │  ● ●  🔵 ●      Click: Open document in Nextcloud       │  │
+│ │    ────┼──●─🔵──●─●────► PC1                                     │  │
+│ │        │   ● ●  ●                                                │  │
+│ │        │    🔵 ●   ●     Explained Variance:                     │  │
+│ │        │  ●    ●  ●      PC1: 23.4% | PC2: 18.7%                 │  │
+│ │        │     ● ●                                                 │  │
+│ │                                                                  │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Results (12 matching documents)                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Kubernetes Setup Guide                        Score: 0.87     │  │
+│ │    "...configure kubectl to connect to cluster..."              │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Container Orchestration Notes                 Score: 0.82     │  │
+│ │    "...deployment strategies for kubernetes..."                 │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 K8s Troubleshooting                           Score: 0.79     │  │
+│ │    "...common kuberntes errors and solutions..."                │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ [Show More Results...]                                           │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Algorithm Performance Comparison                                 │  │
+│ │                                                                  │  │
+│ │ Algorithm    │ Results │ Avg Score │ Time (ms) │ Precision     │  │
+│ │ ─────────────┼─────────┼───────────┼───────────┼───────────     │  │
+│ │ Semantic     │   45    │   0.78    │   145ms   │  ████░ 0.82   │  │
+│ │ Keyword      │   23    │   0.91    │    42ms   │  ███░░ 0.67   │  │
+│ │ Fuzzy        │   67    │   0.72    │    89ms   │  ██░░░ 0.45   │  │
+│ │ Hybrid (RRF) │   52    │   0.84    │   198ms   │  █████ 0.89   │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+**Key UI Features**:
+
+1. **Search Input**: Real-time query testing with instant visualization
+2. **Algorithm Selector**: Dropdown + quick-select buttons
+3. **Weight Sliders**: Visual adjustment with live preview (hybrid mode only)
+4. **Document Type Filters**: Checkboxes for notes, files, calendar, contacts
+5. **2D Scatter Plot**: Interactive Plotly.js visualization
+   - Blue dots = matching documents (full opacity)
+   - Gray dots = non-matching documents (40% opacity)
+   - Hover = show title + excerpt tooltip
+   - Click = open document in Nextcloud
+   - Zoom/pan controls for exploration
+6. **Results Panel**: Ranked list with scores and excerpts
+7. **Performance Table**: Compare algorithm speed and accuracy
+8. **Explained Variance**: Show how much information PCA preserves
+
+**Technology Stack**:
+- **Frontend**: htmx for dynamic loading, Alpine.js for reactivity
+- **Visualization**: Plotly.js for interactive scatter plots
+- **Styling**: Tailwind CSS (consistent with existing /app UI)
+- **Backend**: Shared `search/algorithms.py` implementation
+
+### 5. Reciprocal Rank Fusion (RRF) for Hybrid Search
+
+Following ADR-003's design:
+
+```python
+def reciprocal_rank_fusion(
+    results: dict[str, list[SearchResult]],
+    weights: dict[str, float],
+    k: int = 60
+) -> list[SearchResult]:
+    """
+    Combine multiple ranked result lists using RRF.
+
+    Args:
+        results: Dict of algorithm_name -> ranked results
+        weights: Dict of algorithm_name -> weight (0-1)
+        k: RRF constant (default: 60, standard value)
+
+    Returns:
+        Combined and re-ranked results
+    """
+    scores = defaultdict(float)
+
+    for algo_name, algo_results in results.items():
+        weight = weights.get(algo_name, 0.0)
+        for rank, result in enumerate(algo_results, start=1):
+            # RRF formula: 1 / (k + rank)
+            rrf_score = weight / (k + rank)
+            scores[result.doc_id] += rrf_score
+
+    # Sort by combined score, return top results
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
+```
+
+**RRF properties**:
+- **Rank-based**: Uses position, not raw scores (handles score scale differences)
+- **Proven effective**: Standard approach in information retrieval
+- **Configurable**: `k` parameter controls rank decay (default: 60)
+- **Weight support**: Allows algorithm-specific importance
+
+## Implementation Plan
+
+### Phase 1: Extract and Unify Algorithms (Week 1)
+
+1. Create `nextcloud_mcp_server/search/` module
+2. Implement `algorithms.py` with base interface
+3. Extract semantic search logic from `server/semantic.py`
+4. Implement keyword search from ADR-001 design
+5. Extract fuzzy search from viz pane
+6. Implement RRF hybrid search from ADR-003
+7. Add comprehensive unit tests for each algorithm
+
+### Phase 2: Update MCP Tool (Week 1-2)
+
+1. Add `algorithm` parameter to `nc_semantic_search()`
+2. Add weight parameters (`semantic_weight`, etc.)
+3. Implement algorithm dispatcher
+4. Add parameter validation (weights sum ≤1.0)
+5. Update response model to include algorithm metadata
+6. Maintain backward compatibility (default: hybrid)
+7. Add integration tests for all algorithm modes
+
+### Phase 3: Update Viz Pane (Week 2)
+
+**Critical: All processing must happen server-side**
+
+1. **Remove client-side search filtering**
+   - Delete JavaScript-based keyword/fuzzy matching
+   - Remove client-side document type filtering
+   - No search logic in browser
+2. **Implement server-side endpoint** (`/app/vector-viz`)
+   - Accept query, algorithm, weights, doc_type filters
+   - Execute search via `search/algorithms.py`
+   - Filter results by user_id (security)
+   - Perform PCA reduction (768-dim → 2D)
+   - Return JSON with 2D coordinates + metadata only
+3. **Update frontend**
+   - htmx form submission to `/app/vector-viz`
+   - Algorithm selector dropdown
+   - Weight adjustment sliders (htmx updates on change)
+   - Document type checkboxes
+   - Plotly.js visualization of server response
+4. **Performance optimization**
+   - Limit results to user's documents only
+   - Cache PCA transformation (invalidate on new vectors)
+   - Stream large result sets if needed
+   - Add loading indicators for server processing
+
+### Phase 4: Documentation and Testing (Week 2-3)
+
+1. Update MCP tool documentation
+2. Add algorithm selection guide
+3. Document weight tuning recommendations
+4. Add end-to-end tests (MCP + viz pane)
+5. Performance benchmarks for each algorithm
+6. Update CLAUDE.md with search patterns
+
+## Consequences
+
+### Positive
+
+1. **Flexibility**: MCP clients can optimize search for their use case
+2. **Unified implementation**: Single source of truth for search algorithms
+3. **User empowerment**: Viz pane enables query testing and tuning
+4. **Backward compatible**: Existing semantic search behavior preserved
+5. **Extensible**: Easy to add new algorithms (BM25, neural reranking)
+6. **Testable**: Each algorithm can be unit tested independently
+7. **Standards-based**: RRF is proven in production systems
+
+### Negative
+
+1. **Complexity**: More parameters for clients to understand
+2. **API surface**: Larger tool signature (8 parameters)
+3. **Performance**: Hybrid search requires multiple queries
+4. **Validation overhead**: Weight validation adds processing
+5. **Documentation burden**: Need to explain when to use each algorithm
+
+### Neutral
+
+1. **Weight defaults**: May need tuning based on user feedback
+2. **Algorithm performance**: Will vary by content type and query
+3. **Viz pane adoption**: Unknown if users will utilize testing interface
+
+## Alternatives Considered
+
+### Alternative 1: Separate Tools Per Algorithm
+
+```python
+@mcp.tool()
+async def nc_semantic_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure vector similarity search."""
+
+@mcp.tool()
+async def nc_keyword_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure keyword matching."""
+
+@mcp.tool()
+async def nc_hybrid_search(query: str, ctx: Context, weights: dict, ...) -> SearchResponse:
+    """Hybrid search with weights."""
+```
+
+**Rejected because**:
+- API proliferation (3+ tools instead of 1)
+- Harder to discover capabilities
+- Backward compatibility issues
+- DRY violation (repeated parameters)
+
+### Alternative 2: Server-Wide Configuration Only
+
+```python
+# .env configuration
+SEARCH_ALGORITHM=hybrid
+SEMANTIC_WEIGHT=0.5
+KEYWORD_WEIGHT=0.3
+FUZZY_WEIGHT=0.2
+```
+
+**Rejected because**:
+- No per-query flexibility
+- MCP clients cannot optimize for different tasks
+- Requires server restart for changes
+- User's requirement: "expose a way for users to override the default weights"
+
+### Alternative 3: Production-Grade Fuzzy (Levenshtein/RapidFuzz)
+
+**Rejected because**:
+- Adds external dependency
+- Simple character overlap performs adequately
+- Can always upgrade later if needed
+- User's preference: "Keep simple character overlap"
+
+## Related ADRs
+
+- **ADR-001**: Enhanced Note Search (keyword algorithm design)
+- **ADR-003**: Vector Database and Semantic Search (hybrid search + RRF design)
+- **ADR-007**: Background Vector Sync (semantic search implementation)
+- **ADR-008**: MCP Sampling for RAG (uses semantic search results)
+- **ADR-009**: Semantic Search OAuth Scope (security model)
+- **ADR-011**: Improving Semantic Search Quality (mentions future "ADR-013" for hybrid search)
+
+**This ADR supersedes**:
+- ADR-011's placeholder for "ADR-013: Hybrid Search"
+
+**This ADR implements**:
+- ADR-003's hybrid search design (previously unimplemented)
+- ADR-001's keyword search design (previously unimplemented)
+
+## References
+
+- **Reciprocal Rank Fusion**: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). "Reciprocal rank fusion outperforms condorcet and individual rank learning methods." SIGIR '09.
+- **Vector Search**: Malkov, Y. A., & Yashunin, D. A. (2018). "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs." TPAMI.
+- **Hybrid Search Best Practices**: Qdrant documentation on hybrid search patterns
+- **MCP Protocol**: Model Context Protocol specification for tool design
+
+## Implementation Notes
+
+### Weight Validation
+
+```python
+def validate_weights(
+    semantic_weight: float,
+    keyword_weight: float,
+    fuzzy_weight: float
+) -> None:
+    """Validate hybrid search weights."""
+    if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+        raise ValueError("Weights must be non-negative")
+
+    total = semantic_weight + keyword_weight + fuzzy_weight
+    if total > 1.0:
+        raise ValueError(f"Weights sum to {total:.2f}, must be ≤1.0")
+
+    if total == 0.0:
+        raise ValueError("At least one weight must be > 0")
+```
+
+### Backward Compatibility
+
+The default behavior (`algorithm="hybrid"` with balanced weights) provides better results than current pure semantic search, while maintaining the same tool name and signature structure. Existing clients will automatically benefit from hybrid search without code changes.
+
+### Performance Considerations
+
+- **Semantic search**: ~50-200ms (vector DB query)
+- **Keyword search**: ~10-50ms (in-memory token matching)
+- **Fuzzy search**: ~20-100ms (character comparison)
+- **Hybrid search**: ~100-300ms (parallel execution + fusion)
+
+Parallel execution of algorithms minimizes hybrid search latency.
+
+### Security Model
+
+All algorithms respect the same security boundaries:
+1. **User filtering**: Qdrant queries filter by `user_id`
+2. **Access verification**: Results verified via Nextcloud API
+3. **OAuth scope**: `semantic:read` required for all algorithms
+4. **Viz pane**: Shows only current user's documents
+
+## Success Metrics
+
+1. **Adoption**: % of MCP clients using algorithm parameter
+2. **Performance**: Search latency percentiles (p50, p95, p99)
+3. **Quality**: User satisfaction with result relevance
+4. **Viz pane usage**: % of users accessing testing interface
+5. **Weight distribution**: Most common weight configurations
+
+## Future Enhancements
+
+1. **Additional algorithms**: BM25, TF-IDF, neural reranking
+2. **Auto-tuning**: Learn optimal weights per user
+3. **Query analysis**: Automatic algorithm selection based on query
+4. **Cross-app search**: Extend beyond notes to calendar, files, etc.
+5. **Feedback loop**: Use click-through rate to improve weights
@@ -0,0 +1,254 @@
+## ADR-013: RAG Evaluation Testing Framework
+
+**Status:** Proposed
+
+**Date:** 2025-11-15
+
+### Context
+
+The `nc_semantic_search_answer` tool implements a Retrieval-Augmented Generation (RAG) system where:
+1. **Retrieval**: Vector sync pipeline indexes Nextcloud documents (notes, calendar, contacts, etc.) into a vector database
+2. **Generation**: MCP client's LLM synthesizes answers from retrieved documents via MCP sampling (ADR-008)
+
+We need a testing framework to evaluate RAG system performance and identify whether failures occur in retrieval (wrong documents found) or generation (poor answer quality). This framework must use industry-standard evaluation methodologies while remaining practical to implement and maintain.
+
+To establish a baseline, we will use the **BeIR/nfcorpus** dataset (medical/biomedical corpus) with ~5,000 documents and established query/answer pairs.
+
+Homepage: https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/
+Download: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip
+
+### Decision
+
+We will implement a **two-part evaluation framework** that independently tests retrieval and generation quality using pytest fixtures.
+
+#### In Scope
+
+**1. Retrieval Evaluation**
+Tests the vector sync/embedding pipeline's ability to find relevant documents.
+
+- **Metric: Context Recall** (Did we retrieve documents containing the answer?)
+  - **Evaluation method**: Heuristic - Check if ground-truth document IDs appear in top-k retrieval results
+  - **Test**: Query → Semantic search → Assert expected doc IDs present
+
+**2. Generation Evaluation**
+Tests the MCP client LLM's ability to synthesize correct answers from retrieved context.
+
+- **Metric: Answer Correctness** (Is the generated answer factually correct?)
+  - **Evaluation method**: LLM-as-judge - Compare RAG answer against ground-truth answer
+  - **Test**: Query → `nc_semantic_search_answer` → LLM evaluates answer vs. ground truth (binary true/false)
+
+#### Out of Scope (Initial Implementation)
+
+- **Context Relevance/Precision**: Measuring irrelevant documents in retrieval results
+- **Faithfulness/Groundedness**: Detecting hallucinations not supported by retrieved context
+- **Answer Relevance**: Whether answer addresses the specific question asked
+- **Out-of-Scope Handling**: Testing "I don't know" responses when answer isn't in context
+- **Continuous benchmarking**: Automated tracking of metric trends over time
+- **Custom domain datasets**: Production-specific test data (medical corpus used initially)
+
+These remain valuable for future iterations but add complexity beyond our initial goals.
+
+#### Implementation
+
+**Test Structure**
+
+Location: `tests/rag_evaluation/`
+- `test_retrieval_quality.py` - Retrieval evaluation tests
+- `test_generation_quality.py` - Generation evaluation tests
+- `conftest.py` - Fixtures for test data, MCP clients, and evaluation LLMs
+
+**Required Pytest Fixtures**
+
+1. **`nfcorpus_test_data`** (session-scoped)
+   - Downloads/caches BeIR nfcorpus dataset at runtime
+   - Loads 5 pre-selected test queries with:
+     - Query text
+     - Pre-generated ground-truth answer (from `tests/rag_evaluation/fixtures/ground_truth.json`)
+     - Expected document IDs (from qrels with score=2)
+   - Uploads all corpus documents as notes in test Nextcloud instance
+   - Triggers vector sync to index documents
+   - Waits for indexing completion
+   - Returns test case data structure
+
+2. **`mcp_sampling_client`** (session-scoped)
+   - Creates MCP client that supports sampling
+   - Configurable LLM provider (ollama or anthropic) via environment:
+     - `RAG_EVAL_PROVIDER=ollama` (default) or `anthropic`
+     - `RAG_EVAL_OLLAMA_BASE_URL=http://localhost:11434`
+     - `RAG_EVAL_OLLAMA_MODEL=llama3.1:8b`
+     - `RAG_EVAL_ANTHROPIC_API_KEY=sk-...`
+     - `RAG_EVAL_ANTHROPIC_MODEL=claude-3-5-sonnet-20241022`
+   - Returns configured MCP client fixture
+
+3. **`evaluation_llm`** (session-scoped)
+   - Separate LLM instance for evaluation (independent from MCP client)
+   - Same provider configuration as `mcp_sampling_client`
+   - Returns callable: `async def evaluate(prompt: str) -> str`
+
+**Test Implementation Examples**
+
+```python
+# tests/rag_evaluation/test_retrieval_quality.py
+async def test_retrieval_recall(nc_client, nfcorpus_test_data):
+    """Test that semantic search retrieves documents containing the answer."""
+    for test_case in nfcorpus_test_data:
+        # Perform semantic search (retrieval only, no generation)
+        results = await nc_client.notes.semantic_search(
+            query=test_case.query,
+            limit=10
+        )
+
+        retrieved_doc_ids = {r.document_id for r in results}
+        expected_doc_ids = set(test_case.expected_document_ids)
+
+        # Context Recall: Are expected documents in top-k results?
+        recall = len(expected_doc_ids & retrieved_doc_ids) / len(expected_doc_ids)
+        assert recall >= 0.8, f"Recall {recall} below threshold for query: {test_case.query}"
+
+
+# tests/rag_evaluation/test_generation_quality.py
+async def test_answer_correctness(mcp_sampling_client, evaluation_llm, nfcorpus_test_data):
+    """Test that RAG system generates factually correct answers."""
+    for test_case in nfcorpus_test_data:
+        # Execute full RAG pipeline (retrieval + generation)
+        result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": test_case.query, "limit": 5}
+        )
+
+        rag_answer = result["generated_answer"]
+
+        # LLM-as-judge evaluation
+        evaluation_prompt = f"""Compare these two answers and respond with only TRUE or FALSE.
+
+Question: {test_case.query}
+
+Generated Answer: {rag_answer}
+
+Ground Truth Answer: {test_case.ground_truth}
+
+Are these answers semantically equivalent (do they convey the same factual information)?
+Respond with only: TRUE or FALSE"""
+
+        evaluation_result = await evaluation_llm(evaluation_prompt)
+
+        assert evaluation_result.strip().upper() == "TRUE", \
+            f"Answer mismatch for query: {test_case.query}\nGot: {rag_answer}\nExpected: {test_case.ground_truth}"
+```
+
+**Dataset Integration**
+
+The BeIR nfcorpus dataset structure:
+- **corpus.jsonl**: 3,633 medical/biomedical documents (articles from PubMed)
+- **queries.jsonl**: 3,237 queries (questions)
+- **qrels/*.tsv**: Relevance judgments mapping query IDs to document IDs with scores (2=highly relevant, 1=somewhat relevant)
+
+**Important**: The dataset provides relevance judgments (which documents answer which queries) but does NOT include ground truth answers. We must generate synthetic ground truth offline.
+
+**Selected Test Queries** (5 diverse candidates):
+
+1. **PLAIN-2630**: "Alkylphenol Endocrine Disruptors and Allergies" (5 words, 21 highly relevant docs)
+2. **PLAIN-2660**: "How Long to Detox From Fish Before Pregnancy?" (8 words, 20 highly relevant docs)
+3. **PLAIN-2510**: "Coffee and Artery Function" (4 words, 16 highly relevant docs)
+4. **PLAIN-2430**: "Preventing Brain Loss with B Vitamins?" (6 words, 15 highly relevant docs)
+5. **PLAIN-2690**: "Chronic Headaches and Pork Tapeworms" (5 words, 14 highly relevant docs)
+
+**Ground Truth Generation** (offline, pre-test):
+
+Ground truth answers will be generated offline using a script that:
+1. Loads nfcorpus dataset
+2. For each selected query, extracts top 3-5 highly relevant documents
+3. Uses an LLM (ollama/anthropic) to synthesize a reference answer
+4. Stores ground truth in `tests/rag_evaluation/fixtures/ground_truth.json`
+
+```python
+# tools/generate_rag_ground_truth.py
+async def generate_ground_truth(query: str, relevant_docs: List[dict], llm: LLMProvider) -> str:
+    """Generate synthetic ground truth answer from highly relevant documents."""
+    context = "\n\n".join([
+        f"Document {i+1}:\nTitle: {doc['title']}\n{doc['text']}"
+        for i, doc in enumerate(relevant_docs[:5])
+    ])
+
+    prompt = f"""Based on the following documents, provide a comprehensive answer to this question:
+
+Question: {query}
+
+{context}
+
+Provide a factual, well-structured answer that synthesizes information from the documents.
+Focus on accuracy and completeness."""
+
+    return await llm.generate(prompt, max_tokens=500)
+```
+
+**Dataset Loading at Test Runtime** (in `nfcorpus_test_data` fixture):
+
+1. Download nfcorpus dataset (cached in pytest temp directory)
+2. Load corpus, queries, and qrels (relevance judgments)
+3. Load pre-generated ground truth from `tests/rag_evaluation/fixtures/ground_truth.json`
+4. Upload all corpus documents as Nextcloud notes
+5. Trigger vector sync to index documents
+6. Wait for indexing completion
+7. Return test cases with query, ground truth, and expected doc IDs
+
+**LLM Provider Abstraction**
+
+```python
+# tests/rag_evaluation/llm_providers.py
+class LLMProvider(Protocol):
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str: ...
+
+class OllamaProvider:
+    def __init__(self, base_url: str, model: str):
+        self.base_url = base_url
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        # Use httpx to call Ollama API
+        ...
+
+class AnthropicProvider:
+    def __init__(self, api_key: str, model: str):
+        self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return message.content[0].text
+```
+
+### Consequences
+
+**Positive:**
+
+* **Actionable debugging**: Separate retrieval/generation tests pinpoint failure location
+* **Industry-standard metrics**: Context Recall and Answer Correctness are recognized RAG evaluation metrics
+* **Simple initial implementation**: Binary LLM evaluation (true/false) is straightforward to implement and interpret
+* **Extensible framework**: Easy to add more metrics (faithfulness, relevance) later
+* **Standardized benchmark**: nfcorpus provides objective comparison against published RAG systems
+* **Hybrid evaluation**: Combines efficiency (heuristics for retrieval) with quality (LLM-as-judge for generation)
+* **Provider flexibility**: Supports both local (Ollama) and cloud (Anthropic) LLM evaluation
+
+**Negative:**
+
+* **Medical domain bias**: nfcorpus is medical/biomedical content, may not represent production use cases (personal notes, calendar events, etc.)
+* **Manual test execution**: Tests require external LLM access and are not integrated into CI pipeline
+* **Limited initial coverage**: Starting with only 5 queries provides limited statistical confidence
+* **Evaluation cost**: LLM-as-judge for generation evaluation incurs API costs (Anthropic) or requires local inference (Ollama)
+* **Single metric per component**: Initial scope tests only one metric per component, missing other important quality dimensions
+* **Synthetic ground truth**: Ground truth answers are LLM-generated, not human-validated, which may introduce evaluation bias
+* **Large corpus upload**: Uploading 3,633 documents at test runtime may be slow; caching strategy needed
+
+**Future Work:**
+
+* Expand to 50-100 queries for statistical significance
+* Add custom test dataset with production-representative documents (meeting notes, task lists, etc.)
+* Implement additional metrics (faithfulness, context relevance, answer relevance)
+* Create automated benchmarking dashboard to track metric trends
+* Test multi-hop reasoning (synthesis questions requiring multiple documents)
+* Evaluate out-of-scope handling ("I don't know" responses)
@@ -0,0 +1,241 @@
+# ADR-014: Replace Custom Keyword Search with BM25 Hybrid Search via Qdrant
+
+**Date:** 2025-11-16
+
+**Status:** Implemented
+
+---
+
+### 1. Context
+
+Our RAG application currently employs two separate retrieval mechanisms:
+1.  **Dense (Semantic) Search:** Using vector embeddings stored in our Qdrant database to find semantically similar context.
+2.  **Keyword Search:** A custom-built fuzzy/character-based search to match-specific keywords, acronyms, and product codes that semantic search often misses.
+
+This dual-system approach has several drawbacks:
+* **Poor Relevance:** Our current keyword search is basic (e.g., `LIKE` queries or simple fuzzy matching). It is not as effective as modern full-text search algorithms like BM25.
+* **Clunky Fusion:** We lack a robust, principled method to combine the results from the two systems. This leads to disjointed logic in the application layer and suboptimal context being passed to the LLM.
+* **Architectural Complexity:** We must maintain two separate search pathways (one to Qdrant, one to the keyword search mechanism), increasing code complexity and maintenance overhead.
+
+Our vector database, **Qdrant**, natively supports **hybrid search** by combining dense vectors with BM25-based **sparse vectors** in a single collection.
+
+### 2. Decision
+
+We will **deprecate and remove** the existing custom keyword/fuzzy search functionality.
+
+We will **replace it by implementing native hybrid search within Qdrant**. This involves:
+1.  **Modifying the Qdrant Collection:** Updating our collection to support a named sparse vector index configured for BM25.
+2.  **Updating the Ingestion Pipeline:** For every document chunk, we will generate and upsert *both*:
+    * Its **dense vector** (from our existing embedding model).
+    * Its **sparse vector** (generated using a BM25-compatible model, e.g., `Qdrant/bm25` from `fastembed`).
+3.  **Refactoring Retrieval Logic:** All retrieval calls will be consolidated into a single Qdrant query using the `query_points` endpoint. This query will use the `prefetch` parameter to execute both dense and sparse searches, and Qdrant's built-in **Reciprocal Rank Fusion (RRF)** to automatically merge the results into a single, relevance-ranked list.
+4.  **Backfilling:** A one-time migration script will be created to generate and add sparse vectors for all existing documents in the Qdrant collection.
+
+---
+
+### 3. Considered Options
+
+#### Option 1: Native Qdrant Hybrid Search (Chosen)
+* Use Qdrant's built-in sparse vector and RRF capabilities.
+* **Pros:**
+    * **Consolidated Architecture:** Manages both dense and sparse indexes in one database.
+    * **No Data Sync Issues:** Updates are atomic. A single `upsert` updates both representations.
+    * **Built-in Fusion:** RRF is handled natively and efficiently by the database.
+    * **Superior Relevance:** Replaces our brittle custom search with the industry-standard BM25.
+* **Cons:**
+    * Requires a one-time data backfill which may be time-consuming.
+    * Adds a new step (sparse vector generation) to the ingestion pipeline.
+
+#### Option 2: External Full-Text Search (e.g., Elasticsearch)
+* Keep Qdrant for dense search and add a separate Elasticsearch/OpenSearch cluster for BM25.
+* **Pros:**
+    * Provides a very powerful, dedicated full-text search engine.
+* **Cons:**
+    * **High Complexity:** Introduces a new, stateful service to deploy, manage, and scale.
+    * **Data Sync Nightmare:** We would be responsible for ensuring that the document IDs and content in Qdrant and Elasticsearch are always perfectly synchronized. This is a major source of bugs.
+    * **Manual Fusion:** The application would have to query both systems and perform RRF manually.
+
+#### Option 3: Keep Current System
+* Make no changes.
+* **Pros:**
+    * No engineering effort required.
+* **Cons:**
+    * Fails to address the known relevance and architectural problems.
+    * Our RAG application's performance will remain suboptimal, especially for keyword-sensitive queries.
+
+---
+
+### 4. Rationale
+
+**Option 1 is the clear winner.** It directly solves our primary problem (poor keyword matching) by adopting the industry-standard BM25.
+
+Critically, it achieves this while **simplifying** our overall architecture, not complicating it. By leveraging features already present in our existing database (Qdrant), we avoid the massive operational and synchronization overhead of adding a second search system (Option 2).
+
+This decision consolidates our retrieval logic, eliminates the data consistency problem, and moves the complex fusion logic (RRF) from the application layer into the database, where it can be performed more efficiently.
+
+### 5. Consequences
+
+**New Work:**
+* **Ingestion:** The data ingestion pipeline must be updated to add the `fastembed` library (or similar), generate sparse vectors, and upsert them to the new named vector field in Qdrant.
+* **Retrieval:** The application's retrieval service must be refactored to use the `query_points` endpoint with `prefetch` and `fusion=models.Fusion.RRF`.
+* **Migration:** A one-time backfill script must be written and executed to add sparse vectors for all existing documents.
+* **Infrastructure:** The Qdrant collection schema must be updated (or re-created) to add the `sparse_vectors_config`.
+
+**Positive:**
+* **Improved Accuracy:** Retrieval will be significantly more accurate, handling both semantic and keyword queries robustly.
+* **Simplified Code:** The application's retrieval logic will be cleaner and simpler, with one endpoint instead of two.
+* **Reduced Maintenance:** We will remove the custom fuzzy-search code, which is brittle and difficult to maintain.
+
+**Negative:**
+* The data backfill process will require careful management to avoid downtime.
+* Ingestion time will slightly increase due to the extra step of sparse vector generation. This is considered a negligible trade-off for the gains in relevance.
+
+---
+
+### 6. Implementation Notes
+
+**Implementation completed on 2025-11-16**
+
+**Key Changes:**
+
+1. **Dependencies** (pyproject.toml:25):
+   - Added `fastembed>=0.4.2` for BM25 sparse vector embeddings
+   - Adjusted `pillow` version constraint to be compatible with fastembed
+
+2. **Qdrant Collection Schema** (nextcloud_mcp_server/vector/qdrant_client.py:113-128):
+   - Updated to named vectors: `{"dense": VectorParams(...), "sparse": SparseVectorParams(...)}`
+   - Added sparse vector configuration with BM25 index
+   - Maintains backward compatibility with existing collections (detects legacy schema)
+
+3. **BM25 Embedding Provider** (nextcloud_mcp_server/embedding/bm25_provider.py):
+   - Created `BM25SparseEmbeddingProvider` using FastEmbed's `Qdrant/bm25` model
+   - Implements `encode()` and `encode_batch()` methods
+   - Returns sparse vectors as `{indices: list[int], values: list[float]}` format
+
+4. **Document Indexing Pipeline** (nextcloud_mcp_server/vector/processor.py:229-255):
+   - Generates both dense (semantic) and sparse (BM25) embeddings for each document chunk
+   - Updates `PointStruct` to use named vectors: `vector={"dense": ..., "sparse": ...}`
+   - Maintains same chunking strategy (512 words, 50-word overlap)
+
+5. **BM25 Hybrid Search Algorithm** (nextcloud_mcp_server/search/bm25_hybrid.py):
+   - Implements `BM25HybridSearchAlgorithm` using Qdrant's native RRF fusion
+   - Uses `prefetch` parameter for parallel dense + sparse search
+   - Applies `fusion=models.Fusion.RRF` for automatic result merging
+   - Maintains same deduplication and filtering logic as semantic search
+
+6. **MCP Tool Updates** (nextcloud_mcp_server/server/semantic.py:39-68):
+   - Simplified `nc_semantic_search()` to use BM25 hybrid only
+   - Removed `algorithm`, `semantic_weight`, `keyword_weight`, `fuzzy_weight` parameters
+   - Updated default `score_threshold=0.0` for RRF scoring
+   - Returns `search_method="bm25_hybrid"` in responses
+
+7. **Legacy Algorithm Removal**:
+   - Deleted `nextcloud_mcp_server/search/keyword.py` (278 lines)
+   - Deleted `nextcloud_mcp_server/search/fuzzy.py` (220 lines)
+   - Deleted `nextcloud_mcp_server/search/hybrid.py` (238 lines - custom RRF)
+   - Updated `nextcloud_mcp_server/search/__init__.py` to export only BM25 hybrid
+
+**Migration Strategy:**
+- No migration required (vector sync feature is experimental)
+- New documents automatically indexed with both dense + sparse vectors
+- Collection re-creation on first startup with updated schema
+
+**Test Results:**
+- All unit tests passing (118 passed)
+- All integration tests passing (7 semantic search tests)
+- Code formatting verified with ruff
+
+**Benefits Realized:**
+- ✅ Consolidated architecture (single Qdrant database for both dense + sparse)
+- ✅ Native fusion algorithms (database-level, more efficient)
+- ✅ Industry-standard BM25 (replaces custom keyword search)
+- ✅ Simplified codebase (removed 736 lines of legacy code)
+- ✅ Better relevance (handles both semantic and keyword queries)
+- ✅ Configurable fusion methods (RRF and DBSF)
+
+---
+
+### 7. Fusion Algorithm Options
+
+**Update: 2025-11-16**
+
+The BM25 hybrid search now supports two fusion algorithms for combining dense (semantic) and sparse (BM25) search results:
+
+#### Reciprocal Rank Fusion (RRF)
+
+**Default fusion method.** RRF is a widely-used, well-established algorithm that combines rankings from multiple retrieval systems using the reciprocal rank formula:
+
+```
+RRF(doc) = Σ 1/(k + rank_i(doc))
+```
+
+where `k` is a constant (typically 60) and `rank_i(doc)` is the rank of the document in retrieval system `i`.
+
+**Characteristics:**
+- ✅ **General-purpose**: Works well across diverse query types and document collections
+- ✅ **Rank-based**: Focuses on relative rankings rather than absolute scores
+- ✅ **Established**: Well-tested, documented, and understood in IR literature
+- ✅ **Robust**: Less sensitive to score distribution differences between systems
+
+**When to use RRF:**
+- Default choice for most use cases
+- When you have mixed query types (semantic + keyword)
+- When retrieval systems have very different score ranges
+- When you want predictable, well-understood behavior
+
+#### Distribution-Based Score Fusion (DBSF)
+
+**Alternative fusion method.** DBSF normalizes scores from each retrieval system using distribution statistics before combining them:
+
+1. **Normalization**: For each query, calculates mean (μ) and standard deviation (σ) of scores
+2. **Outlier handling**: Uses μ ± 3σ as normalization bounds
+3. **Fusion**: Sums normalized scores across systems
+
+**Characteristics:**
+- ✅ **Score-aware**: Uses actual relevance scores, not just rankings
+- ✅ **Statistical**: Normalizes based on score distribution properties
+- ⚠️ **Experimental**: Newer algorithm, less battle-tested than RRF
+- ⚠️ **Sensitive**: May behave differently depending on score distributions
+
+**When to use DBSF:**
+- When retrieval systems have vastly different score ranges that RRF doesn't balance well
+- When you want to experiment with score-based (vs rank-based) fusion
+- When statistical normalization better matches your use case
+- For A/B testing against RRF to measure retrieval quality improvements
+
+#### Configuration
+
+Both fusion algorithms are exposed via the `fusion` parameter in MCP tools:
+
+```python
+# Use RRF (default)
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf"  # Can be omitted, RRF is default
+)
+
+# Use DBSF
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="dbsf"
+)
+```
+
+The `nc_semantic_search_answer` tool also supports the `fusion` parameter and passes it through to the underlying search.
+
+#### Future: Configurable Weights
+
+**Current limitation**: Neither RRF nor DBSF currently support per-system weights (e.g., 0.8 for semantic, 0.2 for BM25). This is a Qdrant platform limitation tracked in [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067).
+
+When Qdrant adds weight support, the `fusion` parameter can be extended to accept weight configurations:
+
+```python
+# Hypothetical future API
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf",
+    fusion_weights={"dense": 0.7, "sparse": 0.3}  # Not yet implemented
+)
+```
+
+**Recommendation**: Start with RRF (default). If you encounter cases where keyword matches are under- or over-weighted, experiment with DBSF. Monitor [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067) for configurable weight support.
@@ -0,0 +1,380 @@
+# ADR-015: Unified Provider Architecture for Embeddings and Text Generation
+
+**Status:** Accepted
+**Date:** 2025-01-16
+**Deciders:** Development Team
+**Related:** ADR-003 (Vector Database), ADR-008 (MCP Sampling), ADR-013 (RAG Evaluation)
+
+## Context
+
+Prior to this refactoring, the codebase had two separate provider systems:
+
+1. **Embedding Providers** (`nextcloud_mcp_server/embedding/`)
+   - Used `EmbeddingProvider` ABC with methods: `embed()`, `embed_batch()`, `get_dimension()`
+   - Had auto-detection via `EmbeddingService._detect_provider()`
+   - Used for semantic search and vector indexing (production)
+
+2. **LLM Providers** (`tests/rag_evaluation/llm_providers.py`)
+   - Used `LLMProvider` Protocol with method: `generate()`
+   - Had separate factory function `create_llm_provider()`
+   - Used only for RAG evaluation tests (not production)
+
+This fragmentation created several problems:
+
+### Problems with Dual Provider Systems
+
+1. **Code Duplication**
+   - Ollama configuration appeared in both `embedding/service.py` and `tests/rag_evaluation/llm_providers.py`
+   - Similar provider detection logic in multiple places
+   - Separate singleton patterns for each system
+
+2. **Limited Extensibility**
+   - Hard-coded provider detection in `EmbeddingService._detect_provider()`
+   - No support for providers that offer both capabilities (like Bedrock)
+   - Adding new providers required modifying multiple files
+
+3. **Inconsistent Patterns**
+   - BM25 provider didn't follow `EmbeddingProvider` ABC
+   - Different method names across providers (`embed` vs `encode`)
+   - ABC vs Protocol for type checking
+
+4. **Difficult Scaling**
+   - Adding Amazon Bedrock (our third provider) would exacerbate all issues
+   - No clear path for future providers (OpenAI, Cohere, etc.)
+
+### Amazon Bedrock Requirements
+
+Bedrock naturally supports **both** embeddings and text generation:
+- **Embeddings**: `amazon.titan-embed-text-v1/v2`, `cohere.embed-*`
+- **Text Generation**: `anthropic.claude-*`, `meta.llama3-*`, `amazon.titan-text-*`
+- **Unified API**: Single `invoke_model()` method via bedrock-runtime
+
+This made it the perfect opportunity to establish a unified provider architecture.
+
+## Decision
+
+We refactored the provider infrastructure to use a **unified Provider ABC** with optional capabilities:
+
+### 1. Unified Provider Interface
+
+**New Structure:**
+```
+nextcloud_mcp_server/providers/
+├── __init__.py
+├── base.py              # Provider ABC with optional capabilities
+├── registry.py          # Auto-detection and factory
+├── ollama.py            # Supports both embedding + generation
+├── anthropic.py         # Generation only
+├── bedrock.py           # Supports both embedding + generation
+└── simple.py            # Embedding only (testing fallback)
+```
+
+**Base Class (`providers/base.py`):**
+```python
+class Provider(ABC):
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate batch embeddings (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """Get embedding dimension (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """Generate text (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close provider and release resources."""
+        pass
+```
+
+### 2. Provider Registry
+
+**Auto-Detection Priority** (`providers/registry.py`):
+```python
+class ProviderRegistry:
+    @staticmethod
+    def create_provider() -> Provider:
+        # 1. Bedrock (AWS_REGION or BEDROCK_*_MODEL)
+        # 2. Ollama (OLLAMA_BASE_URL)
+        # 3. Simple (fallback)
+```
+
+**Environment Variables:**
+
+**Bedrock:**
+- `AWS_REGION`: AWS region (e.g., "us-east-1")
+- `AWS_ACCESS_KEY_ID`: AWS access key (optional, uses credential chain)
+- `AWS_SECRET_ACCESS_KEY`: AWS secret key (optional)
+- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL`: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+**Ollama:**
+- `OLLAMA_BASE_URL`: Ollama API base URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL`: Model for embeddings (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL`: Model for text generation (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL`: Verify SSL certificates (default: "true")
+
+**Simple (no configuration, fallback):**
+- `SIMPLE_EMBEDDING_DIMENSION`: Embedding dimension (default: 384)
+
+### 3. Backward Compatibility
+
+**Old Code Continues to Work:**
+```python
+# Old way (still works)
+from nextcloud_mcp_server.embedding import get_embedding_service
+
+service = get_embedding_service()  # Returns singleton Provider
+embeddings = await service.embed_batch(texts)
+```
+
+**New Way (recommended):**
+```python
+# New way (cleaner)
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Returns singleton Provider
+embeddings = await provider.embed_batch(texts)
+
+# Can also use generation if provider supports it
+if provider.supports_generation:
+    text = await provider.generate("prompt")
+```
+
+**Migration Path:**
+- `embedding/service.py` now wraps `providers.get_provider()` for compatibility
+- `tests/rag_evaluation/llm_providers.py` now uses unified providers
+- Old imports still work, marked as deprecated in docstrings
+
+### 4. Amazon Bedrock Implementation
+
+**Features:**
+- Supports both embeddings and text generation
+- Model-specific request/response handling for:
+  - Titan Embed (amazon.titan-embed-text-*)
+  - Cohere Embed (cohere.embed-*)
+  - Claude (anthropic.claude-*)
+  - Llama (meta.llama3-*)
+  - Titan Text (amazon.titan-text-*)
+  - Mistral (mistral.*)
+- Uses boto3 bedrock-runtime client
+- Graceful degradation if boto3 not installed
+- Async implementation matching existing patterns
+
+**Model-Specific Handling:**
+```python
+# Bedrock embedding request (Titan)
+{"inputText": text}
+
+# Bedrock generation request (Claude)
+{
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": max_tokens,
+    "temperature": 0.7,
+    "messages": [{"role": "user", "content": prompt}]
+}
+```
+
+## Consequences
+
+### Positive
+
+1. **Sustainable Provider Additions**
+   - New providers only need to implement `Provider` ABC
+   - Auto-detection via environment variables
+   - No modifications to existing code required
+
+2. **Code Consolidation**
+   - Single provider interface instead of two
+   - Unified configuration pattern
+   - Eliminated duplication
+
+3. **Better Extensibility**
+   - Providers can support one or both capabilities
+   - Clear capability detection via properties
+   - Registry pattern simplifies auto-detection
+
+4. **Improved Testing**
+   - RAG evaluation can use any provider (Ollama, Anthropic, Bedrock)
+   - Comprehensive unit tests for all providers
+   - Mocked boto3 tests for Bedrock
+
+5. **Production-Ready Bedrock Support**
+   - Full embedding and generation support
+   - Multiple model families supported
+   - AWS credential chain integration
+
+### Neutral
+
+1. **Optional Boto3 Dependency**
+   - boto3 is dev dependency only (not required for core functionality)
+   - Bedrock provider gracefully fails if boto3 not installed
+   - Users who want Bedrock must `pip install boto3`
+
+2. **Capability Properties**
+   - All providers must implement capability properties
+   - Methods raise `NotImplementedError` if capability not supported
+   - Clear error messages guide users to alternatives
+
+### Negative
+
+1. **Migration Effort**
+   - Existing code must be migrated to new imports (optional, backward compatible)
+   - Documentation needs updating
+   - Users must learn new environment variables
+
+2. **Increased Complexity**
+   - Provider base class has more methods (embedding + generation)
+   - More environment variables to configure
+   - Capability detection adds runtime checks
+
+## Implementation
+
+### Files Created
+
+**New Provider Infrastructure:**
+- `nextcloud_mcp_server/providers/__init__.py`
+- `nextcloud_mcp_server/providers/base.py`
+- `nextcloud_mcp_server/providers/registry.py`
+- `nextcloud_mcp_server/providers/ollama.py`
+- `nextcloud_mcp_server/providers/anthropic.py`
+- `nextcloud_mcp_server/providers/bedrock.py`
+- `nextcloud_mcp_server/providers/simple.py`
+
+**Tests:**
+- `tests/unit/providers/__init__.py`
+- `tests/unit/providers/test_bedrock.py` (9 unit tests)
+
+**Documentation:**
+- `docs/ADR-015-unified-provider-architecture.md` (this file)
+
+### Files Modified
+
+**Backward Compatibility:**
+- `nextcloud_mcp_server/embedding/service.py` - Now wraps `get_provider()`
+- `tests/rag_evaluation/llm_providers.py` - Uses unified providers
+
+**Dependencies:**
+- `pyproject.toml` - Added `boto3>=1.35.0` to dev dependencies
+
+### Testing Results
+
+**Unit Tests:** 127 passed (including 9 new Bedrock tests)
+**Type Checking:** All checks passed (ty)
+**Linting:** All checks passed (ruff)
+**Backward Compatibility:** Verified - existing embedding tests work
+
+## Alternatives Considered
+
+### Alternative 1: Keep Separate Provider Systems
+
+**Pros:**
+- No refactoring needed
+- Simpler short-term
+
+**Cons:**
+- Bedrock would need to be implemented twice
+- Continued code duplication
+- No long-term scalability
+
+**Decision:** Rejected - technical debt would continue to grow
+
+### Alternative 2: Separate Embedding and Generation Providers
+
+Use composition instead of unified interface:
+```python
+class CombinedProvider:
+    def __init__(self, embedding: EmbeddingProvider, generation: LLMProvider):
+        self.embedding = embedding
+        self.generation = generation
+```
+
+**Pros:**
+- Clearer separation of concerns
+- Simpler individual providers
+
+**Cons:**
+- Bedrock and Ollama naturally do both - artificial separation
+- More complex configuration (two providers to configure)
+- More boilerplate code
+
+**Decision:** Rejected - unified interface better matches provider capabilities
+
+### Alternative 3: Plugin System
+
+Dynamic provider registration via entry points:
+```python
+# setup.py
+entry_points={
+    'nextcloud_mcp.providers': [
+        'ollama = nextcloud_mcp_server.providers.ollama:OllamaProvider',
+        'bedrock = nextcloud_mcp_server.providers.bedrock:BedrockProvider',
+    ]
+}
+```
+
+**Pros:**
+- Most extensible
+- Third-party providers possible
+
+**Cons:**
+- Over-engineered for current needs
+- Added complexity
+- No immediate benefit
+
+**Decision:** Deferred - can add later if needed
+
+## Future Work
+
+1. **Additional Providers**
+   - OpenAI (embeddings + generation)
+   - Cohere (embeddings + generation)
+   - Google Vertex AI
+   - Azure OpenAI
+
+2. **Provider Features**
+   - Streaming generation support
+   - Batch API optimization (when available)
+   - Model-specific optimizations
+   - Cost tracking and metrics
+
+3. **Configuration Improvements**
+   - Provider profiles (development, production)
+   - Model aliasing (e.g., "small", "large")
+   - Fallback provider chains
+
+4. **Testing**
+   - Integration tests with real Bedrock endpoints
+   - Performance benchmarking across providers
+   - Cost comparison analysis
+
+## References
+
+- [boto3 Bedrock Runtime Documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Amazon Bedrock User Guide](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html)
+- ADR-003: Vector Database and Semantic Search
+- ADR-008: MCP Sampling for Semantic Search
+- ADR-013: RAG Evaluation Framework
@@ -0,0 +1,492 @@
+# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
+
+**Status:** Proposed
+**Date:** 2025-01-22
+**Deciders:** Development Team
+**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
+
+## Context
+
+[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
+
+- **Discovery**: Marketplace listing for MCP servers
+- **Hosting**: Containerized deployment with auto-scaling
+- **Authentication UI**: OAuth flow presentation for users
+- **Session Configuration**: Per-user settings passed via URL parameters
+- **Observability**: Usage logs and monitoring
+
+### Current Architecture Limitations
+
+The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
+
+1. **Persistent Infrastructure**
+   - Qdrant vector database for semantic search
+   - Background sync worker for content indexing
+   - Refresh token storage for offline access
+
+2. **Single-Tenant Configuration**
+   - Environment variables configure one Nextcloud instance
+   - `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
+   - Or OAuth with a single IdP
+
+3. **Stateful Operations**
+   - Vector sync maintains index state across requests
+   - Token storage persists between sessions
+
+### Smithery Hosting Constraints
+
+Smithery-hosted containers are **stateless by design**:
+
+- No persistent storage between requests
+- No background workers or cron jobs
+- No databases (Qdrant, Redis, etc.)
+- Containers may be recycled at any time
+- Configuration passed per-session via URL parameters
+
+### Opportunity
+
+Many users have **publicly accessible Nextcloud instances** and want to:
+
+1. Try the MCP server without self-hosting infrastructure
+2. Connect multiple users to different Nextcloud instances
+3. Use basic Nextcloud tools without semantic search
+4. Benefit from Smithery's discovery and OAuth UI
+
+## Decision
+
+Implement a **stateless deployment mode** for Smithery that:
+
+1. **Disables stateful features** (vector sync, semantic search)
+2. **Creates clients per-session** from Smithery configuration
+3. **Supports multiple Nextcloud instances** via session config
+4. **Provides a useful subset of tools** that work without infrastructure
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                    Smithery-Hosted Stateless Mode                        │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                          │
+│  MCP Client                    Smithery                                  │
+│  (Cursor, Claude)              Infrastructure                            │
+│        │                            │                                    │
+│        │ 1. Connect                 │                                    │
+│        ├───────────────────────────►│                                    │
+│        │                            │                                    │
+│        │ 2. Config UI               │                                    │
+│        │◄───────────────────────────┤  User enters:                      │
+│        │    (Smithery presents)     │  - nextcloud_url                   │
+│        │                            │  - auth_mode (basic/oauth)         │
+│        │                            │  - credentials                     │
+│        │ 3. Tool call               │                                    │
+│        ├───────────────────────────►│                                    │
+│        │    + session config        │                                    │
+│        │                            │                                    │
+│        │                    ┌───────┴───────┐                            │
+│        │                    │  MCP Server   │                            │
+│        │                    │  Container    │                            │
+│        │                    │               │                            │
+│        │                    │ 4. Create     │                            │
+│        │                    │    client     │                            │
+│        │                    │    from       │                            │
+│        │                    │    config     │                            │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │                    │ 5. Call       │                            │
+│        │                    │    Nextcloud  │───────► User's Nextcloud   │
+│        │                    │    API        │         Instance           │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │ 6. Response        │ Return result │                            │
+│        │◄───────────────────┤               │                            │
+│        │                    └───────────────┘                            │
+│                                                                          │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Session Configuration Schema
+
+```python
+from pydantic import BaseModel, Field
+
+class SmitheryConfigSchema(BaseModel):
+    """Configuration schema for Smithery session."""
+
+    # Required: Nextcloud instance
+    nextcloud_url: str = Field(
+        ...,
+        description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+    )
+
+    # Authentication mode
+    auth_mode: str = Field(
+        "app_password",
+        description="Authentication method: 'app_password' or 'oauth'"
+    )
+
+    # App Password authentication (recommended for Smithery)
+    username: str | None = Field(
+        None,
+        description="Nextcloud username (required for app_password auth)"
+    )
+    app_password: str | None = Field(
+        None,
+        description="Nextcloud app password (Settings → Security → App passwords)"
+    )
+
+    # OAuth authentication (advanced)
+    # When auth_mode='oauth', Smithery handles the OAuth flow
+    # and passes the access token automatically
+```
+
+### Feature Matrix
+
+| Feature | Self-Hosted | Smithery Stateless |
+|---------|-------------|-------------------|
+| **Notes** | | |
+| List/Search notes | ✓ | ✓ |
+| Get/Create/Update notes | ✓ | ✓ |
+| Semantic search | ✓ | ✗ |
+| **Calendar** | | |
+| List calendars | ✓ | ✓ |
+| Get/Create events | ✓ | ✓ |
+| **Contacts** | | |
+| List address books | ✓ | ✓ |
+| Search/Get contacts | ✓ | ✓ |
+| **Files (WebDAV)** | | |
+| List/Download files | ✓ | ✓ |
+| Upload files | ✓ | ✓ |
+| Search files | ✓ | ✓ (keyword only) |
+| **Deck** | | |
+| List boards/cards | ✓ | ✓ |
+| Create/Update cards | ✓ | ✓ |
+| **Tables** | | |
+| List/Query tables | ✓ | ✓ |
+| Create/Update rows | ✓ | ✓ |
+| **Cookbook** | | |
+| List/Get recipes | ✓ | ✓ |
+| **Semantic Search** | | |
+| Vector search | ✓ | ✗ |
+| RAG answers | ✓ | ✗ |
+| **Background Sync** | | |
+| Auto-indexing | ✓ | ✗ |
+| Webhook sync | ✓ | ✗ |
+| **Admin UI (`/app`)** | | |
+| Vector sync status | ✓ | ✗ |
+| Vector visualization | ✓ | ✗ |
+| Webhook management | ✓ | ✗ |
+| Session management | ✓ | ✗ |
+
+### Implementation
+
+#### 1. Deployment Mode Detection
+
+```python
+# nextcloud_mcp_server/config.py
+
+class DeploymentMode(Enum):
+    SELF_HOSTED = "self_hosted"      # Full features, env-based config
+    SMITHERY_STATELESS = "smithery"  # Stateless, session-based config
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment."""
+    if os.getenv("SMITHERY_DEPLOYMENT") == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+```
+
+#### 2. Session-Based Client Factory
+
+```python
+# nextcloud_mcp_server/context.py
+
+async def get_client(ctx: Context) -> NextcloudClient:
+    """Get NextcloudClient - from session config or environment."""
+
+    mode = get_deployment_mode()
+
+    if mode == DeploymentMode.SMITHERY_STATELESS:
+        # Create client from Smithery session config
+        config = ctx.session_config
+        if not config:
+            raise McpError("Session configuration required")
+
+        return NextcloudClient(
+            base_url=config.nextcloud_url,
+            username=config.username,
+            password=config.app_password,
+        )
+    else:
+        # Existing behavior: from environment or OAuth context
+        return await _get_client_from_context(ctx)
+```
+
+#### 3. Conditional Tool Registration
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_mcp_server(mode: DeploymentMode) -> FastMCP:
+    """Create MCP server with mode-appropriate tools."""
+
+    mcp = FastMCP("Nextcloud MCP")
+
+    # Always register core tools
+    configure_notes_tools(mcp)
+    configure_calendar_tools(mcp)
+    configure_contacts_tools(mcp)
+    configure_webdav_tools(mcp)
+    configure_deck_tools(mcp)
+    configure_tables_tools(mcp)
+    configure_cookbook_tools(mcp)
+
+    # Only register stateful tools in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        configure_semantic_tools(mcp)  # Requires Qdrant
+        register_oauth_tools(mcp)       # Requires token storage
+
+    return mcp
+```
+
+#### 4. Exclude Admin UI Routes
+
+The `/app` admin UI should **not be installed** in Smithery mode because:
+
+- **Vector sync status** - No vector sync in stateless mode
+- **Vector visualization** - No Qdrant to visualize
+- **Webhook management** - No webhook sync without background workers
+- **Session management** - No persistent sessions to manage
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_app(mode: DeploymentMode) -> Starlette:
+    """Create Starlette app with mode-appropriate routes."""
+
+    routes = [
+        Route("/health/live", health_live, methods=["GET"]),
+        Route("/health/ready", health_ready, methods=["GET"]),
+    ]
+
+    # Only mount admin UI in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        browser_app = create_browser_app()
+        routes.append(
+            Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
+        )
+        routes.append(Mount("/app", app=browser_app))
+        logger.info("Admin UI mounted at /app")
+    else:
+        logger.info("Admin UI disabled in Smithery stateless mode")
+
+    # Mount FastMCP at root
+    mcp_app = create_mcp_server(mode).streamable_http_app()
+    routes.append(Mount("/", app=mcp_app))
+
+    return Starlette(routes=routes, lifespan=starlette_lifespan)
+```
+
+**Endpoints by Mode:**
+
+| Endpoint | Self-Hosted | Smithery |
+|----------|-------------|----------|
+| `/mcp` | ✓ | ✓ |
+| `/health/live` | ✓ | ✓ |
+| `/health/ready` | ✓ | ✓ |
+| `/.well-known/mcp-config` | ✓ | ✓ |
+| `/app` | ✓ | ✗ |
+| `/app/vector-sync/status` | ✓ | ✗ |
+| `/app/vector-viz` | ✓ | ✗ |
+| `/app/webhooks` | ✓ | ✗ |
+
+#### 5. Smithery Integration Files
+
+**smithery.yaml:**
+```yaml
+runtime: "container"
+build:
+  dockerfile: "Dockerfile.smithery"
+  dockerBuildPath: "."
+startCommand:
+  type: "http"
+  configSchema:
+    type: "object"
+    required: ["nextcloud_url", "username", "app_password"]
+    properties:
+      nextcloud_url:
+        type: "string"
+        title: "Nextcloud URL"
+        description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+      username:
+        type: "string"
+        title: "Username"
+        description: "Your Nextcloud username"
+      app_password:
+        type: "string"
+        title: "App Password"
+        description: "Generate at Settings → Security → App passwords"
+  exampleConfig:
+    nextcloud_url: "https://cloud.example.com"
+    username: "alice"
+    app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
+```
+
+**Dockerfile.smithery:**
+```dockerfile
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+
+# Copy project files
+COPY pyproject.toml uv.lock ./
+COPY nextcloud_mcp_server ./nextcloud_mcp_server
+
+# Install dependencies (without vector/semantic extras)
+RUN uv sync --frozen --no-dev
+
+# Set Smithery mode
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081
+EXPOSE 8081
+
+CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
+```
+
+**nextcloud_mcp_server/smithery_main.py:**
+```python
+"""Smithery-specific entrypoint for stateless deployment."""
+
+import os
+import uvicorn
+from starlette.middleware.cors import CORSMiddleware
+
+from nextcloud_mcp_server.app import create_mcp_server
+from nextcloud_mcp_server.config import DeploymentMode
+
+def main():
+    # Force stateless mode
+    os.environ["SMITHERY_DEPLOYMENT"] = "true"
+    os.environ["VECTOR_SYNC_ENABLED"] = "false"
+
+    mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
+    app = mcp.streamable_http_app()
+
+    # Add CORS for browser-based clients
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["GET", "POST", "OPTIONS"],
+        allow_headers=["*"],
+        expose_headers=["mcp-session-id", "mcp-protocol-version"],
+    )
+
+    # Smithery sets PORT environment variable
+    port = int(os.environ.get("PORT", 8081))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+
+if __name__ == "__main__":
+    main()
+```
+
+### Security Considerations
+
+1. **App Passwords over User Passwords**
+   - Smithery config encourages app passwords (revocable, scoped)
+   - Documentation guides users to create dedicated app passwords
+   - App passwords can be revoked without changing main password
+
+2. **HTTPS Required**
+   - `nextcloud_url` must be HTTPS for production use
+   - Validation rejects HTTP URLs in Smithery mode
+
+3. **No Credential Storage**
+   - Credentials exist only for request duration
+   - No server-side persistence of user credentials
+   - Smithery handles secure config transmission
+
+4. **Scope Limitation**
+   - Stateless mode cannot access offline_access
+   - No background operations on user's behalf
+   - Clear user expectation: tools work during session only
+
+### Migration Path
+
+Users can start with Smithery stateless mode and migrate to self-hosted:
+
+1. **Try on Smithery** → Basic tools, no setup
+2. **Self-host for semantic search** → Add Qdrant, enable vector sync
+3. **Full deployment** → Background sync, webhooks, multi-user OAuth
+
+## Consequences
+
+### Positive
+
+1. **Lower barrier to entry** - Users can try without infrastructure
+2. **Multi-user support** - Each session connects to different Nextcloud
+3. **Smithery ecosystem** - Discovery, observability, OAuth UI
+4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
+
+### Negative
+
+1. **No semantic search** - Key differentiator unavailable on Smithery
+2. **Per-request auth** - Credentials sent with each request
+3. **No offline access** - Cannot perform background operations
+4. **Maintenance burden** - Two deployment modes to support
+
+### Neutral
+
+1. **Feature subset** - May encourage users to self-host for full features
+2. **Documentation needs** - Clear guidance on mode differences required
+
+## Alternatives Considered
+
+### 1. External MCP Only
+
+**Approach:** Only support self-hosted external MCP registration on Smithery.
+
+**Rejected because:**
+- Higher barrier to entry for new users
+- Misses opportunity for Smithery marketplace visibility
+- Users want to try before committing to infrastructure
+
+### 2. Embedded Vector DB (SQLite-vec)
+
+**Approach:** Use SQLite with vector extensions for per-request indexing.
+
+**Rejected because:**
+- No persistence between requests anyway
+- Indexing latency too high for synchronous requests
+- Complexity without benefit in stateless context
+
+### 3. External Vector DB Service
+
+**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
+
+**Rejected because:**
+- Adds external dependency and cost
+- Per-user collections require complex multi-tenancy
+- Sync still impossible without background workers
+
+### 4. Hybrid: Smithery + User's Qdrant
+
+**Approach:** User provides their own Qdrant URL in session config.
+
+**Considered for future:**
+- Could enable semantic search for advanced users
+- Adds complexity to session config
+- Sync still requires external trigger (manual or webhook)
+
+## References
+
+- [Smithery Documentation](https://smithery.ai/docs)
+- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
+- [Smithery External MCPs](https://smithery.ai/docs/build/external)
+- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
+- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
@@ -0,0 +1,338 @@
+# Amazon Bedrock Setup Guide
+
+This guide covers how to configure the Nextcloud MCP Server to use Amazon Bedrock for embeddings and text generation.
+
+## Prerequisites
+
+1. **AWS Account** with access to Amazon Bedrock
+2. **boto3 library** installed: `pip install boto3` or `uv sync --group dev`
+3. **Model Access** - Request access to models in AWS Bedrock console
+
+## Required AWS Permissions
+
+### IAM Policy for Bedrock Access
+
+The AWS IAM user or role needs the following permissions:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockInvokeModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel",
+        "bedrock:InvokeModelWithResponseStream"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+### Minimal Permissions (Production)
+
+For production deployments, restrict to specific models:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockEmbeddings",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0"
+      ]
+    },
+    {
+      "Sid": "BedrockGeneration",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"
+      ]
+    }
+  ]
+}
+```
+
+### Additional Permissions (Optional)
+
+For advanced use cases:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockListModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:ListFoundationModels",
+        "bedrock:GetFoundationModel"
+      ],
+      "Resource": "*"
+    },
+    {
+      "Sid": "BedrockAsyncInvoke",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModelAsync",
+        "bedrock:GetAsyncInvoke",
+        "bedrock:ListAsyncInvokes"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+## Model Access
+
+Before using Bedrock models, you must request access in the AWS Console:
+
+1. Navigate to **Amazon Bedrock** → **Model access**
+2. Click **Manage model access**
+3. Select models you want to use:
+   - **Embeddings:** Amazon Titan Embed Text, Cohere Embed
+   - **Text Generation:** Anthropic Claude, Meta Llama, Amazon Titan Text
+4. Click **Request model access**
+5. Wait for approval (usually instant for most models)
+
+## Supported Models
+
+### Embedding Models
+
+| Provider | Model ID | Dimensions | Best For |
+|----------|----------|------------|----------|
+| Amazon Titan | `amazon.titan-embed-text-v1` | 1,536 | General purpose |
+| Amazon Titan | `amazon.titan-embed-text-v2:0` | 1,024 | Latest, improved quality |
+| Cohere | `cohere.embed-english-v3` | 1,024 | English text |
+| Cohere | `cohere.embed-multilingual-v3` | 1,024 | Multilingual |
+
+### Text Generation Models
+
+| Provider | Model ID | Context | Best For |
+|----------|----------|---------|----------|
+| Anthropic | `anthropic.claude-3-sonnet-20240229-v1:0` | 200K | Balanced performance |
+| Anthropic | `anthropic.claude-3-haiku-20240307-v1:0` | 200K | Fast, cost-effective |
+| Anthropic | `anthropic.claude-3-opus-20240229-v1:0` | 200K | Highest quality |
+| Meta | `meta.llama3-8b-instruct-v1:0` | 8K | Fast, open-source |
+| Meta | `meta.llama3-70b-instruct-v1:0` | 8K | High quality |
+| Amazon | `amazon.titan-text-express-v1` | 8K | Fast, low cost |
+| Mistral | `mistral.mistral-7b-instruct-v0:2` | 32K | Efficient |
+
+## Configuration
+
+### Environment Variables
+
+**Required:**
+```bash
+AWS_REGION=us-east-1
+```
+
+**Optional (at least one model required):**
+```bash
+# For embeddings
+BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+
+# For text generation (RAG evaluation)
+BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+```
+
+**AWS Credentials (choose one method):**
+
+**Method 1: Environment Variables**
+```bash
+AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
+AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 2: AWS Credentials File** (`~/.aws/credentials`)
+```ini
+[default]
+aws_access_key_id = AKIAIOSFODNN7EXAMPLE
+aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 3: IAM Role** (when running on AWS EC2/ECS/Lambda)
+- No credentials needed, uses instance/task role automatically
+
+### Docker Configuration
+
+Add to your `docker-compose.yml`:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+      - BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+```
+
+Or use AWS credentials file volume mount:
+
+```yaml
+services:
+  mcp:
+    volumes:
+      - ~/.aws:/root/.aws:ro
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+```
+
+## Usage Examples
+
+### Embeddings Only
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export AWS_ACCESS_KEY_ID=your-key
+export AWS_SECRET_ACCESS_KEY=your-secret
+
+uv run nextcloud-mcp-server
+```
+
+### Both Embeddings and Generation
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+# For RAG evaluation with Bedrock
+export RAG_EVAL_PROVIDER=bedrock
+export RAG_EVAL_BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+uv run python -m tests.rag_evaluation.evaluate
+```
+
+### Programmatic Usage
+
+```python
+from nextcloud_mcp_server.providers import BedrockProvider
+
+# Embeddings only
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+)
+
+embeddings = await provider.embed_batch(["text1", "text2"])
+
+# Both capabilities
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+    generation_model="anthropic.claude-3-sonnet-20240229-v1:0",
+)
+
+# Generate embeddings
+embedding = await provider.embed("query text")
+
+# Generate text
+response = await provider.generate("Write a summary", max_tokens=500)
+```
+
+## Cost Considerations
+
+### Embedding Costs (as of Jan 2025)
+
+| Model | Price per 1K tokens |
+|-------|---------------------|
+| Titan Embed Text v2 | $0.0001 |
+| Cohere Embed English v3 | $0.0001 |
+
+### Generation Costs (as of Jan 2025)
+
+| Model | Input (per 1K tokens) | Output (per 1K tokens) |
+|-------|----------------------|------------------------|
+| Claude 3 Haiku | $0.00025 | $0.00125 |
+| Claude 3 Sonnet | $0.003 | $0.015 |
+| Claude 3 Opus | $0.015 | $0.075 |
+| Llama 3 8B | $0.0003 | $0.0006 |
+| Titan Text Express | $0.0002 | $0.0006 |
+
+**Note:** Prices vary by region. Check [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/) for current rates.
+
+## Troubleshooting
+
+### Error: "Executable doesn't exist" or boto3 not found
+
+**Solution:**
+```bash
+uv sync --group dev  # Installs boto3
+```
+
+### Error: "AccessDeniedException"
+
+**Causes:**
+1. IAM permissions missing
+2. Model access not requested
+3. Wrong AWS region
+
+**Solution:**
+1. Verify IAM policy includes `bedrock:InvokeModel`
+2. Request model access in Bedrock console
+3. Check model is available in your region
+
+### Error: "ResourceNotFoundException"
+
+**Cause:** Invalid model ID or model not available in region
+
+**Solution:**
+- Verify model ID matches exactly (case-sensitive)
+- Check model availability in your AWS region
+- Use `aws bedrock list-foundation-models` to see available models
+
+### Error: "ThrottlingException"
+
+**Cause:** Rate limit exceeded
+
+**Solution:**
+- Reduce request rate
+- Request quota increase via AWS Support
+- Use batch operations where possible
+
+## Security Best Practices
+
+1. **Use IAM Roles** when running on AWS infrastructure
+2. **Rotate Access Keys** regularly if using IAM users
+3. **Restrict Permissions** to only required models
+4. **Enable CloudTrail** for audit logging
+5. **Use AWS Secrets Manager** for credential management
+6. **Monitor Costs** with AWS Cost Explorer and Budgets
+
+## Regional Availability
+
+Amazon Bedrock is available in:
+- **US East (N. Virginia)**: `us-east-1` ✅ Most models
+- **US West (Oregon)**: `us-west-2` ✅ Most models
+- **Asia Pacific (Singapore)**: `ap-southeast-1`
+- **Asia Pacific (Tokyo)**: `ap-northeast-1`
+- **Europe (Frankfurt)**: `eu-central-1`
+
+**Note:** Model availability varies by region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) for current availability.
+
+## References
+
+- [AWS Bedrock Documentation](https://docs.aws.amazon.com/bedrock/)
+- [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/)
+- [boto3 Bedrock Runtime API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Provider Architecture ADR](./ADR-015-unified-provider-architecture.md)
@@ -243,7 +243,7 @@ If you see cardinality warnings:
 The observability stack integrates at multiple layers:

 1. **HTTP Layer**: `ObservabilityMiddleware` tracks all HTTP requests
-2. **MCP Layer**: Tools use `@trace_mcp_tool` for span creation
+2. **MCP Layer**: Tools use `@instrument_tool` for automatic metrics and trace span creation
 3. **Client Layer**: `BaseNextcloudClient` tracks all API calls
 4. **OAuth Layer**: Token operations are traced and metered
 5. **Background Tasks**: Vector sync operations emit metrics/traces
@@ -0,0 +1,93 @@
+# Vector Sync UI Guide
+
+This guide covers the browser-based interface for the Nextcloud MCP Server's semantic search and vector synchronization features.
+
+## Overview
+
+The Vector Sync UI (`/app`) provides an interactive interface to test semantic search queries and visualize results from your Nextcloud documents. It exposes the same retrieval capabilities that LLMs use in Retrieval-Augmented Generation (RAG) workflows, powered by Alpine.js for reactive state, htmx for dynamic updates, and Plotly.js for 3D visualization.
+
+**Supported Apps**: Notes, Files (text/PDF), Calendar (events/tasks), Contacts (CardDAV), and Deck are indexed and searchable.
+
+## Accessing the UI
+
+Navigate to `/app` after authentication:
+- **BasicAuth mode**: `http://localhost:8000/app` (uses credentials from environment)
+- **OAuth mode**: `http://localhost:8000/app` (redirects to login if not authenticated)
+
+## Tabs
+
+### Welcome Page
+
+Landing page that introduces semantic search and RAG workflows. Shows authentication status, explains how vector embeddings work, and provides feature navigation. Adapts content based on whether `VECTOR_SYNC_ENABLED=true`.
+
+### User Info
+
+Displays authentication details and session information:
+- **BasicAuth**: Username, mode badge, Nextcloud host
+- **OAuth**: Username, session ID (truncated), background access status, IdP profile, revocation option
+
+### Vector Sync Status
+
+Real-time monitoring of document indexing:
+- **Indexed Documents**: Total chunks stored in Qdrant vector database (immediately searchable)
+- **Pending Documents**: Queue awaiting embedding processing
+- **Status**: "✓ Idle" (green) when up-to-date, "⟳ Syncing" (orange) during processing
+
+Auto-refreshes every 10 seconds via htmx. Check this tab after adding content to verify indexing completion.
+
+### Vector Visualization
+
+Interactive search interface with 3D PCA plot of semantic space.
+
+**Search Controls**:
+- **Query**: Natural language search (e.g., "health benefits of coffee")
+- **Algorithm**: Semantic (Dense) for pure vector search, or BM25 Hybrid (default) combining vectors + keywords
+- **Fusion** (Hybrid only): RRF (Reciprocal Rank Fusion) or DBSF (Distribution-Based Score Fusion)
+- **Advanced**: Filter by document type, adjust score threshold (0.0-1.0), set result limit (max 100)
+
+**3D Visualization**:
+
+The plot uses Principal Component Analysis (PCA) to reduce 768-dimensional embeddings to 3D. Documents are positioned by semantic similarity with the query point shown in red. Point size and opacity indicate relevance, and the Viridis color scale shows relative scores (yellow = highest match).
+
+**Critical Fix**: Vectors are L2-normalized before PCA to match Qdrant's cosine distance, ensuring query points position accurately near similar documents. Without normalization, magnitude differences cause misleading spatial separation.
+
+**Results List**:
+
+Each result shows document title (clickable link to Nextcloud), excerpt, raw score, relative percentage, and document type. Click "Show Chunk" to view the matched text segment with surrounding context (up to 500 characters before/after).
+
+## Configuration
+
+**Required**:
+```bash
+VECTOR_SYNC_ENABLED=true
+```
+
+**Optional** (for browser-accessible links):
+```bash
+NEXTCLOUD_PUBLIC_ISSUER_URL=https://your-public-nextcloud-url.com
+```
+
+**Admin Access**: Webhooks tab only visible to Nextcloud admins (verified via Provisioning API).
+
+## Use Cases
+
+**Testing Search Queries**: Preview results before they reach LLMs in RAG workflows. Compare semantic vs. hybrid algorithms, verify relevance scores, and validate that correct documents are retrieved. Use chunk context to see exactly which text segments match and why unexpected documents appear.
+
+**Monitoring Indexing**: Track real-time progress after creating or modifying documents. Check if the queue is backing up (high pending count) or confirm the system is idle after bulk imports. Verify documents become searchable immediately after indexing completes.
+
+**Algorithm Comparison**: Pure semantic search excels at conceptual queries and synonyms. BM25 hybrid combines semantic understanding with precise keyword matching for better accuracy on specific terms. Experiment with RRF vs. DBSF fusion for different score distributions.
+
+## Troubleshooting
+
+**Vector Sync Tab Not Visible**: Set `VECTOR_SYNC_ENABLED=true` and restart the server.
+
+**No Search Results**: Check Vector Sync Status to confirm documents are indexed (not just pending). Try broader queries or lower the score threshold in Advanced options. Initial indexing may take time depending on document volume.
+
+**Links to Nextcloud Apps Not Working**: Set `NEXTCLOUD_PUBLIC_ISSUER_URL` to your browser-accessible Nextcloud URL for correct link generation.
+
+## Related Documentation
+
+- [Configuration Guide](../configuration.md) - Environment variables and settings
+- [Authentication Modes](../authentication.md) - BasicAuth vs OAuth setup
+- [Installation Guide](../installation.md) - Getting started
+- [ADR-008: MCP Sampling for RAG](../ADR-008-mcp-sampling-for-rag.md) - Technical details on RAG workflows
@@ -3,6 +3,7 @@ import os
 import time
 from collections.abc import AsyncIterator
 from contextlib import AsyncExitStack, asynccontextmanager
+from contextvars import ContextVar
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Optional

@@ -24,6 +25,9 @@ from starlette.middleware.authentication import AuthenticationMiddleware
 from starlette.middleware.cors import CORSMiddleware
 from starlette.responses import JSONResponse, RedirectResponse
 from starlette.routing import Mount, Route
+from starlette.staticfiles import StaticFiles
+from starlette.types import ASGIApp, Receive, Send
+from starlette.types import Scope as StarletteScope

 from nextcloud_mcp_server.auth import (
    InsufficientScopeError,
@@ -35,6 +39,8 @@ from nextcloud_mcp_server.auth import (
 from nextcloud_mcp_server.auth.unified_verifier import UnifiedTokenVerifier
 from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import (
+    DeploymentMode,
+    get_deployment_mode,
    get_document_processor_config,
    get_settings,
 )
@@ -121,6 +127,26 @@ def initialize_document_processors():
        except Exception as e:
            logger.warning(f"Failed to register Tesseract processor: {e}")

+    # Register PyMuPDF processor (high priority, local, no API required)
+    if "pymupdf" in config["processors"]:
+        pymupdf_config = config["processors"]["pymupdf"]
+        try:
+            from nextcloud_mcp_server.document_processors.pymupdf import (
+                PyMuPDFProcessor,
+            )
+
+            processor = PyMuPDFProcessor(
+                extract_images=pymupdf_config.get("extract_images", True),
+                image_dir=pymupdf_config.get("image_dir"),
+            )
+            registry.register(processor, priority=15)  # Higher than unstructured
+            logger.info(
+                f"Registered PyMuPDF processor: extract_images={pymupdf_config.get('extract_images', True)}"
+            )
+            registered_count += 1
+        except Exception as e:
+            logger.warning(f"Failed to register PyMuPDF processor: {e}")
+
    # Register custom processor
    if "custom" in config["processors"]:
        custom_config = config["processors"]["custom"]
@@ -217,6 +243,25 @@ def validate_pkce_support(discovery: dict, discovery_url: str) -> None:
    click.echo(f"✓ PKCE support validated: {code_challenge_methods}")


+@dataclass
+class VectorSyncState:
+    """
+    Module-level state for vector sync background tasks.
+
+    This singleton bridges the Starlette server lifespan (where background tasks run)
+    and FastMCP session lifespans (where MCP tools need access to the streams).
+    """
+
+    document_send_stream: Optional[MemoryObjectSendStream] = None
+    document_receive_stream: Optional[MemoryObjectReceiveStream] = None
+    shutdown_event: Optional[anyio.Event] = None
+    scanner_wake_event: Optional[anyio.Event] = None
+
+
+# Module-level singleton for vector sync state
+_vector_sync_state = VectorSyncState()
+
+
@dataclass
 class AppContext:
    """Application context for BasicAuth mode."""
@@ -243,17 +288,160 @@ class OAuthAppContext:
    )


+@dataclass
+class SmitheryAppContext:
+    """Application context for Smithery stateless mode.
+
+    ADR-016: No shared client - clients created per-request from session config.
+    """
+
+    pass  # No shared state needed - everything comes from session config
+
+
+# ADR-016: Smithery config schema for container runtime
+# This schema is served at /.well-known/mcp-config for Smithery discovery
+# See: https://smithery.ai/docs/build/session-config
+SMITHERY_CONFIG_SCHEMA = {
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "$id": "https://server.smithery.ai/nextcloud-mcp-server/.well-known/mcp-config",
+    "title": "Nextcloud MCP Server Configuration",
+    "description": "Configuration for connecting to your Nextcloud instance via app password authentication",
+    "x-query-style": "flat",  # Our schema has no nested objects, so flat style works
+    "type": "object",
+    "required": ["nextcloud_url", "username", "app_password"],
+    "properties": {
+        "nextcloud_url": {
+            "type": "string",
+            "title": "Nextcloud URL",
+            "description": "Your Nextcloud instance URL (e.g., https://cloud.example.com). Must be publicly accessible.",
+            "pattern": "^https?://.+",
+        },
+        "username": {
+            "type": "string",
+            "title": "Username",
+            "description": "Your Nextcloud username",
+            "minLength": 1,
+        },
+        "app_password": {
+            "type": "string",
+            "title": "App Password",
+            "description": "Nextcloud app password. Generate at Settings > Security > App passwords. Do NOT use your main password.",
+            "minLength": 1,
+        },
+    },
+    "additionalProperties": False,
+}
+
+
+# ADR-016: Context variable to hold Smithery session config per-request
+# This is set by SmitheryConfigMiddleware and accessed in context.py
+_smithery_session_config: ContextVar[dict[str, str] | None] = ContextVar(
+    "smithery_session_config"
+)
+_smithery_session_config.set(None)  # Set initial value
+
+
+def get_smithery_session_config() -> dict | None:
+    """Get the current Smithery session config from context variable.
+
+    Used by context.py to access config extracted from URL query parameters.
+    """
+    return _smithery_session_config.get()
+
+
+class SmitheryConfigMiddleware:
+    """Middleware to extract Smithery config from URL query parameters.
+
+    ADR-016: For container runtime, Smithery passes configuration as URL query
+    parameters to the /mcp endpoint. This middleware extracts those parameters
+    and stores them in a context variable for access in tools.
+
+    Configuration parameters:
+    - nextcloud_url: Nextcloud instance URL
+    - username: Nextcloud username
+    - app_password: Nextcloud app password
+
+    The extracted config is stored in a ContextVar and can be accessed via
+    get_smithery_session_config() in context.py.
+    """
+
+    def __init__(self, app: ASGIApp):
+        self.app = app
+
+    async def __call__(
+        self, scope: StarletteScope, receive: Receive, send: Send
+    ) -> None:
+        if scope["type"] == "http":
+            # Extract config from query parameters
+            from urllib.parse import parse_qs
+
+            query_string = scope.get("query_string", b"").decode("utf-8")
+            params = parse_qs(query_string)
+
+            # Build session config from query parameters
+            # Smithery uses dot notation for nested objects, but our schema is flat
+            session_config = {}
+            for key in ["nextcloud_url", "username", "app_password"]:
+                if key in params:
+                    # parse_qs returns lists, take first value
+                    session_config[key] = params[key][0]
+
+            # Store in context variable for access by context.py
+            if session_config:
+                _smithery_session_config.set(session_config)
+                logger.debug(
+                    f"Smithery config extracted: nextcloud_url={session_config.get('nextcloud_url')}, "
+                    f"username={session_config.get('username')}"
+                )
+
+        try:
+            await self.app(scope, receive, send)
+        finally:
+            # Clear context variable after request
+            _smithery_session_config.set(None)
+
+
+@asynccontextmanager
+async def app_lifespan_smithery(server: FastMCP) -> AsyncIterator[SmitheryAppContext]:
+    """
+    Manage application lifecycle for Smithery stateless mode.
+
+    ADR-016: Minimal lifespan with no shared state.
+    - No shared Nextcloud client (created per-request from session config)
+    - No vector sync (disabled in Smithery mode)
+    - No persistent storage (stateless deployment)
+    - No document processors (not enabled in Smithery mode)
+    """
+    logger.info("Starting MCP server in Smithery stateless mode")
+    logger.info("Clients will be created per-request from session config")
+
+    try:
+        yield SmitheryAppContext()
+    finally:
+        logger.info("Shutting down Smithery stateless mode")
+
+
 def is_oauth_mode() -> bool:
    """
    Determine if OAuth mode should be used.

    OAuth mode is enabled when:
    - NEXTCLOUD_USERNAME and NEXTCLOUD_PASSWORD are NOT set
+    - AND we are NOT in Smithery stateless mode
    - Or explicitly enabled via configuration

    Returns:
        True if OAuth mode, False if BasicAuth mode
    """
+    # ADR-016: Smithery stateless mode uses per-request BasicAuth from session config
+    # It's not OAuth mode even though env credentials aren't set
+    deployment_mode = get_deployment_mode()
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+        logger.info(
+            "BasicAuth mode (Smithery stateless - credentials from session config)"
+        )
+        return False
+
    username = os.getenv("NEXTCLOUD_USERNAME")
    password = os.getenv("NEXTCLOUD_PASSWORD")

@@ -386,15 +574,15 @@ async def load_oauth_client_credentials(
@asynccontextmanager
 async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
    """
-    Manage application lifecycle for BasicAuth mode.
+    Manage application lifecycle for BasicAuth mode (FastMCP session lifespan).

    Creates a single Nextcloud client with basic authentication
-    that is shared across all requests.
+    that is shared across all requests within a session.

-    If vector sync is enabled (VECTOR_SYNC_ENABLED=true), also starts
-    background tasks for automatic document indexing (ADR-007).
+    Note: Background tasks (scanner, processor) are started at server level
+    in starlette_lifespan, not here. This lifespan runs per-session.
    """
-    logger.info("Starting MCP server in BasicAuth mode")
+    logger.info("Starting MCP session in BasicAuth mode")
    logger.info("Creating Nextcloud client with BasicAuth")

    client = NextcloudClient.from_env()
@@ -410,91 +598,20 @@ async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
    # Initialize document processors
    initialize_document_processors()

-    settings = get_settings()
-
-    # Check if vector sync is enabled
-    if settings.vector_sync_enabled:
-        logger.info("Vector sync enabled - starting background tasks")
-
-        # Get username from environment for BasicAuth mode
-        username = os.getenv("NEXTCLOUD_USERNAME")
-        if not username:
-            raise ValueError(
-                "NEXTCLOUD_USERNAME is required for vector sync in BasicAuth mode"
-            )
-
-        # Initialize Qdrant collection before starting background tasks
-        logger.info("Initializing Qdrant collection...")
-        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
-
-        try:
-            await get_qdrant_client()  # Triggers collection creation if needed
-            logger.info("Qdrant collection ready")
-        except Exception as e:
-            logger.error(f"Failed to initialize Qdrant collection: {e}")
-            raise RuntimeError(
-                f"Cannot start vector sync - Qdrant initialization failed: {e}"
-            ) from e
-
-        # Initialize shared state
-        send_stream, receive_stream = anyio.create_memory_object_stream(
-            max_buffer_size=settings.vector_sync_queue_max_size
+    # Yield client context - scanner runs at server level (starlette_lifespan)
+    # Include vector sync state from module singleton (set by starlette_lifespan)
+    try:
+        yield AppContext(
+            client=client,
+            storage=storage,
+            document_send_stream=_vector_sync_state.document_send_stream,
+            document_receive_stream=_vector_sync_state.document_receive_stream,
+            shutdown_event=_vector_sync_state.shutdown_event,
+            scanner_wake_event=_vector_sync_state.scanner_wake_event,
        )
-        shutdown_event = anyio.Event()
-        scanner_wake_event = anyio.Event()
-
-        # Start background tasks using anyio TaskGroup
-        async with anyio.create_task_group() as tg:
-            # Start scanner task
-            tg.start_soon(
-                scanner_task,
-                send_stream,
-                shutdown_event,
-                scanner_wake_event,
-                client,
-                username,
-            )
-
-            # Start processor pool (each gets a cloned receive stream)
-            for i in range(settings.vector_sync_processor_workers):
-                tg.start_soon(
-                    processor_task,
-                    i,
-                    receive_stream.clone(),
-                    shutdown_event,
-                    client,
-                    username,
-                )
-
-            logger.info(
-                f"Background sync tasks started: 1 scanner + {settings.vector_sync_processor_workers} processors"
-            )
-
-            # Yield with background tasks running
-            try:
-                yield AppContext(
-                    client=client,
-                    storage=storage,
-                    document_send_stream=send_stream,
-                    document_receive_stream=receive_stream,
-                    shutdown_event=shutdown_event,
-                    scanner_wake_event=scanner_wake_event,
-                )
-            finally:
-                # Shutdown signal
-                logger.info("Shutting down background sync tasks")
-                shutdown_event.set()
-
-                # TaskGroup automatically cancels all tasks on exit
-                logger.info("Background sync tasks stopped")
-                await client.close()
-    else:
-        # No vector sync - simple lifecycle
-        try:
-            yield AppContext(client=client, storage=storage)
-        finally:
-            logger.info("Shutting down BasicAuth mode")
-            await client.close()
+    finally:
+        logger.info("Shutting down BasicAuth session")
+        await client.close()


 async def setup_oauth_config():
@@ -507,9 +624,9 @@ async def setup_oauth_config():
    - External IdP mode: OIDC_DISCOVERY_URL points to external provider
      → External IdP for OAuth, Nextcloud user_oidc validates tokens and provides API access

-    Uses generic OIDC environment variables:
+    Uses OIDC environment variables:
    - OIDC_DISCOVERY_URL: OIDC discovery endpoint (optional, defaults to NEXTCLOUD_HOST)
-    - OIDC_CLIENT_ID / OIDC_CLIENT_SECRET: Static credentials (optional, uses DCR if not provided)
+    - NEXTCLOUD_OIDC_CLIENT_ID / NEXTCLOUD_OIDC_CLIENT_SECRET: Static credentials (optional, uses DCR if not provided)
    - NEXTCLOUD_OIDC_SCOPES: Requested OAuth scopes

    This is done synchronously before FastMCP initialization because FastMCP
@@ -633,19 +750,21 @@ async def setup_oauth_config():
            )

    # Load client credentials (static or dynamic registration)
-    client_id = os.getenv("OIDC_CLIENT_ID")
-    client_secret = os.getenv("OIDC_CLIENT_SECRET")
+    client_id = os.getenv("NEXTCLOUD_OIDC_CLIENT_ID")
+    client_secret = os.getenv("NEXTCLOUD_OIDC_CLIENT_SECRET")

    if client_id and client_secret:
        logger.info(f"Using static OIDC client credentials: {client_id}")
    elif registration_endpoint:
-        logger.info("OIDC_CLIENT_ID not set, attempting Dynamic Client Registration")
+        logger.info(
+            "NEXTCLOUD_OIDC_CLIENT_ID not set, attempting Dynamic Client Registration"
+        )
        client_id, client_secret = await load_oauth_client_credentials(
            nextcloud_host=nextcloud_host, registration_endpoint=registration_endpoint
        )
    else:
        raise ValueError(
-            "OIDC_CLIENT_ID and OIDC_CLIENT_SECRET environment variables are required "
+            "NEXTCLOUD_OIDC_CLIENT_ID and NEXTCLOUD_OIDC_CLIENT_SECRET environment variables are required "
            "when the OIDC provider does not support Dynamic Client Registration. "
            f"Discovery URL: {discovery_url}"
        )
@@ -808,7 +927,7 @@ async def setup_oauth_config():
    )


-def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
+def get_app(transport: str = "streamable-http", enabled_apps: list[str] | None = None):
    # Initialize observability (logging will be configured by uvicorn)
    settings = get_settings()

@@ -835,8 +954,9 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
            "OpenTelemetry tracing disabled (set OTEL_EXPORTER_OTLP_ENDPOINT to enable)"
        )

-    # Determine authentication mode
+    # Determine authentication mode and deployment mode
    oauth_enabled = is_oauth_mode()
+    deployment_mode = get_deployment_mode()

    if oauth_enabled:
        logger.info("Configuring MCP server for OAuth mode")
@@ -897,8 +1017,17 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
            auth=auth_settings,
        )
    else:
-        logger.info("Configuring MCP server for BasicAuth mode")
-        mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_basic)
+        # ADR-016: Use Smithery lifespan for stateless mode, BasicAuth otherwise
+        if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+            logger.info("Configuring MCP server for Smithery stateless mode")
+            # json_response=True returns plain JSON-RPC instead of SSE format,
+            # required for Smithery scanner compatibility
+            mcp = FastMCP(
+                "Nextcloud MCP", lifespan=app_lifespan_smithery, json_response=True
+            )
+        else:
+            logger.info("Configuring MCP server for BasicAuth mode")
+            mcp = FastMCP("Nextcloud MCP", lifespan=app_lifespan_basic)

    @mcp.resource("nc://capabilities")
    async def nc_get_capabilities():
@@ -934,8 +1063,12 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
            )

    # Register semantic search tools (cross-app feature)
+    # ADR-016: Skip in Smithery stateless mode (no vector database)
    settings = get_settings()
-    if settings.vector_sync_enabled:
+    deployment_mode = get_deployment_mode()
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+        logger.info("Skipping semantic search tools (Smithery stateless mode)")
+    elif settings.vector_sync_enabled:
        logger.info("Configuring semantic search tools (vector sync enabled)")
        configure_semantic_tools(mcp)
    else:
@@ -1012,180 +1145,177 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
            "Dynamic tool filtering enabled for OAuth mode (JWT and Bearer tokens)"
        )

-    if transport == "sse":
-        mcp_app = mcp.sse_app()
-        starlette_lifespan = None
-    elif transport in ("http", "streamable-http"):
-        mcp_app = mcp.streamable_http_app()
+    mcp_app = mcp.streamable_http_app()

-        @asynccontextmanager
-        async def starlette_lifespan(app: Starlette):
-            # Set OAuth context for OAuth login routes (ADR-004)
-            if oauth_enabled:
-                # Prepare OAuth config from setup_oauth_config closure variables
-                mcp_server_url = os.getenv(
-                    "NEXTCLOUD_MCP_SERVER_URL", "http://localhost:8000"
-                )
-                nextcloud_resource_uri = os.getenv(
-                    "NEXTCLOUD_RESOURCE_URI", nextcloud_host
-                )
-                discovery_url = os.getenv(
-                    "OIDC_DISCOVERY_URL",
-                    f"{nextcloud_host}/.well-known/openid-configuration",
-                )
-                scopes = os.getenv("NEXTCLOUD_OIDC_SCOPES", "")
+    @asynccontextmanager
+    async def starlette_lifespan(app: Starlette):
+        # Set OAuth context for OAuth login routes (ADR-004)
+        if oauth_enabled:
+            # Prepare OAuth config from setup_oauth_config closure variables
+            mcp_server_url = os.getenv(
+                "NEXTCLOUD_MCP_SERVER_URL", "http://localhost:8000"
+            )
+            nextcloud_resource_uri = os.getenv("NEXTCLOUD_RESOURCE_URI", nextcloud_host)
+            discovery_url = os.getenv(
+                "OIDC_DISCOVERY_URL",
+                f"{nextcloud_host}/.well-known/openid-configuration",
+            )
+            scopes = os.getenv("NEXTCLOUD_OIDC_SCOPES", "")

-                oauth_context_dict = {
-                    "storage": refresh_token_storage,
-                    "oauth_client": oauth_client,
-                    "token_verifier": token_verifier,  # For querying IdP userinfo endpoint
-                    "config": {
-                        "mcp_server_url": mcp_server_url,
-                        "discovery_url": discovery_url,
-                        "client_id": client_id,  # From setup_oauth_config (DCR or static)
-                        "client_secret": client_secret,  # From setup_oauth_config (DCR or static)
-                        "scopes": scopes,
-                        "nextcloud_host": nextcloud_host,
-                        "nextcloud_resource_uri": nextcloud_resource_uri,
-                        "oauth_provider": oauth_provider,
-                    },
-                }
-                app.state.oauth_context = oauth_context_dict
+            oauth_context_dict = {
+                "storage": refresh_token_storage,
+                "oauth_client": oauth_client,
+                "token_verifier": token_verifier,  # For querying IdP userinfo endpoint
+                "config": {
+                    "mcp_server_url": mcp_server_url,
+                    "discovery_url": discovery_url,
+                    "client_id": client_id,  # From setup_oauth_config (DCR or static)
+                    "client_secret": client_secret,  # From setup_oauth_config (DCR or static)
+                    "scopes": scopes,
+                    "nextcloud_host": nextcloud_host,
+                    "nextcloud_resource_uri": nextcloud_resource_uri,
+                    "oauth_provider": oauth_provider,
+                },
+            }
+            app.state.oauth_context = oauth_context_dict

-                # Also set oauth_context on browser_app for session authentication
-                # browser_app is in the same function scope (defined later in create_app)
-                # We need to find it in the mounted routes
-                for route in app.routes:
-                    if isinstance(route, Mount) and route.path == "/app":
-                        route.app.state.oauth_context = oauth_context_dict
-                        logger.info(
-                            "OAuth context shared with browser_app for session auth"
-                        )
-                        break
-
-                logger.info(
-                    f"OAuth context initialized for login routes (client_id={client_id[:16]}...)"
-                )
-            else:
-                # BasicAuth mode - share storage with browser_app for webhook management
-                from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
-
-                storage = RefreshTokenStorage.from_env()
-                await storage.initialize()
-
-                app.state.storage = storage
-
-                # Also share with browser_app for webhook routes
-                for route in app.routes:
-                    if isinstance(route, Mount) and route.path == "/app":
-                        route.app.state.storage = storage
-                        logger.info(
-                            "Storage shared with browser_app for webhook management"
-                        )
-                        break
-
-            # Start background vector sync tasks for BasicAuth mode (ADR-007)
-            # For streamable-http transport, FastMCP lifespan isn't automatically triggered
-            # so we manually start background tasks here if vector sync is enabled
-            import anyio as anyio_module
-
-            settings = get_settings()
-            if not oauth_enabled and settings.vector_sync_enabled:
-                logger.info("Starting background vector sync tasks for BasicAuth mode")
-
-                # Get username from environment
-                username = os.getenv("NEXTCLOUD_USERNAME")
-                if not username:
-                    raise ValueError(
-                        "NEXTCLOUD_USERNAME required for vector sync in BasicAuth mode"
+            # Also set oauth_context on browser_app for session authentication
+            # browser_app is in the same function scope (defined later in create_app)
+            # We need to find it in the mounted routes
+            for route in app.routes:
+                if isinstance(route, Mount) and route.path == "/app":
+                    route.app.state.oauth_context = oauth_context_dict
+                    logger.info(
+                        "OAuth context shared with browser_app for session auth"
                    )
+                    break

-                # Get Nextcloud client from MCP app context
-                # Create client since we're outside FastMCP lifespan
-                client = NextcloudClient.from_env()
+            logger.info(
+                f"OAuth context initialized for login routes (client_id={client_id[:16]}...)"
+            )
+        else:
+            # BasicAuth mode - share storage with browser_app for webhook management
+            from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

-                # Initialize Qdrant collection before starting background tasks
-                logger.info("Initializing Qdrant collection...")
-                from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+            storage = RefreshTokenStorage.from_env()
+            await storage.initialize()

-                try:
-                    await get_qdrant_client()  # Triggers collection creation if needed
-                    logger.info("Qdrant collection ready")
-                except Exception as e:
-                    logger.error(f"Failed to initialize Qdrant collection: {e}")
-                    raise RuntimeError(
-                        f"Cannot start vector sync - Qdrant initialization failed: {e}"
-                    ) from e
+            app.state.storage = storage

-                # Initialize shared state
-                send_stream, receive_stream = anyio_module.create_memory_object_stream(
-                    max_buffer_size=settings.vector_sync_queue_max_size
+            # Also share with browser_app for webhook routes
+            for route in app.routes:
+                if isinstance(route, Mount) and route.path == "/app":
+                    route.app.state.storage = storage
+                    logger.info(
+                        "Storage shared with browser_app for webhook management"
+                    )
+                    break
+
+        # Start background vector sync tasks for BasicAuth mode (ADR-007)
+        # Scanner runs at server-level (once), not per-session
+        import anyio as anyio_module
+
+        settings = get_settings()
+        if not oauth_enabled and settings.vector_sync_enabled:
+            logger.info("Starting background vector sync tasks for BasicAuth mode")
+
+            # Get username from environment
+            username = os.getenv("NEXTCLOUD_USERNAME")
+            if not username:
+                raise ValueError(
+                    "NEXTCLOUD_USERNAME required for vector sync in BasicAuth mode"
                )
-                shutdown_event = anyio_module.Event()
-                scanner_wake_event = anyio_module.Event()

-                # Store in app state for access from routes (ADR-007)
-                app.state.document_send_stream = send_stream
-                app.state.document_receive_stream = receive_stream
-                app.state.shutdown_event = shutdown_event
-                app.state.scanner_wake_event = scanner_wake_event
+            # Create client for vector sync (server-level, not per-session)
+            client = NextcloudClient.from_env()

-                # Also share with browser_app for /app route
-                for route in app.routes:
-                    if isinstance(route, Mount) and route.path == "/app":
-                        route.app.state.document_send_stream = send_stream
-                        route.app.state.document_receive_stream = receive_stream
-                        route.app.state.shutdown_event = shutdown_event
-                        route.app.state.scanner_wake_event = scanner_wake_event
-                        logger.info(
-                            "Vector sync state shared with browser_app for /app"
-                        )
-                        break
+            # Initialize Qdrant collection before starting background tasks
+            logger.info("Initializing Qdrant collection...")
+            from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

-                # Start background tasks using anyio TaskGroup
-                async with anyio_module.create_task_group() as tg:
-                    # Start scanner task
-                    tg.start_soon(
-                        scanner_task,
-                        send_stream,
+            try:
+                await get_qdrant_client()  # Triggers collection creation if needed
+                logger.info("Qdrant collection ready")
+            except Exception as e:
+                logger.error(f"Failed to initialize Qdrant collection: {e}")
+                raise RuntimeError(
+                    f"Cannot start vector sync - Qdrant initialization failed: {e}"
+                ) from e
+
+            # Initialize shared state
+            send_stream, receive_stream = anyio_module.create_memory_object_stream(
+                max_buffer_size=settings.vector_sync_queue_max_size
+            )
+            shutdown_event = anyio_module.Event()
+            scanner_wake_event = anyio_module.Event()
+
+            # Store in app state for access from routes (ADR-007)
+            app.state.document_send_stream = send_stream
+            app.state.document_receive_stream = receive_stream
+            app.state.shutdown_event = shutdown_event
+            app.state.scanner_wake_event = scanner_wake_event
+
+            # Also store in module singleton for FastMCP session lifespans
+            _vector_sync_state.document_send_stream = send_stream
+            _vector_sync_state.document_receive_stream = receive_stream
+            _vector_sync_state.shutdown_event = shutdown_event
+            _vector_sync_state.scanner_wake_event = scanner_wake_event
+            logger.info("Vector sync state stored in module singleton")
+
+            # Also share with browser_app for /app route
+            for route in app.routes:
+                if isinstance(route, Mount) and route.path == "/app":
+                    route.app.state.document_send_stream = send_stream
+                    route.app.state.document_receive_stream = receive_stream
+                    route.app.state.shutdown_event = shutdown_event
+                    route.app.state.scanner_wake_event = scanner_wake_event
+                    logger.info("Vector sync state shared with browser_app for /app")
+                    break
+
+            # Start background tasks using anyio TaskGroup
+            async with anyio_module.create_task_group() as tg:
+                # Start scanner task
+                await tg.start(
+                    scanner_task,
+                    send_stream,
+                    shutdown_event,
+                    scanner_wake_event,
+                    client,
+                    username,
+                )
+
+                # Start processor pool (each gets a cloned receive stream)
+                for i in range(settings.vector_sync_processor_workers):
+                    await tg.start(
+                        processor_task,
+                        i,
+                        receive_stream.clone(),
                        shutdown_event,
-                        scanner_wake_event,
                        client,
                        username,
                    )

-                    # Start processor pool (each gets a cloned receive stream)
-                    for i in range(settings.vector_sync_processor_workers):
-                        tg.start_soon(
-                            processor_task,
-                            i,
-                            receive_stream.clone(),
-                            shutdown_event,
-                            client,
-                            username,
-                        )
+                logger.info(
+                    f"Background sync tasks started: 1 scanner + "
+                    f"{settings.vector_sync_processor_workers} processors"
+                )

-                    logger.info(
-                        f"Background sync tasks started: 1 scanner + "
-                        f"{settings.vector_sync_processor_workers} processors"
-                    )
-
-                    # Run MCP session manager and yield
-                    async with AsyncExitStack() as stack:
-                        await stack.enter_async_context(mcp.session_manager.run())
-                        try:
-                            yield
-                        finally:
-                            # Shutdown signal
-                            logger.info("Shutting down background sync tasks")
-                            shutdown_event.set()
-                            await client.close()
-                            # TaskGroup automatically cancels all tasks on exit
-            else:
-                # No vector sync - just run MCP session manager
+                # Run MCP session manager and yield
                async with AsyncExitStack() as stack:
                    await stack.enter_async_context(mcp.session_manager.run())
-                    yield
+                    try:
+                        yield
+                    finally:
+                        # Shutdown signal
+                        logger.info("Shutting down background sync tasks")
+                        shutdown_event.set()
+                        await client.close()
+                        # TaskGroup automatically cancels all tasks on exit
+        else:
+            # No vector sync - just run MCP session manager
+            async with AsyncExitStack() as stack:
+                await stack.enter_async_context(mcp.session_manager.run())
+                yield

    # Health check endpoints for Kubernetes probes
    def health_live(request):
@@ -1338,6 +1468,26 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
    )
    logger.info("Test webhook endpoint enabled: /webhooks/nextcloud")

+    # ADR-016: Add Smithery well-known config endpoint for container runtime discovery
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+
+        def smithery_mcp_config(request):
+            """Smithery MCP configuration endpoint.
+
+            Returns JSON Schema for Smithery's configuration UI.
+            This endpoint is required for Smithery container runtime discovery.
+            """
+            return JSONResponse(SMITHERY_CONFIG_SCHEMA)
+
+        routes.append(
+            Route(
+                "/.well-known/mcp-config",
+                smithery_mcp_config,
+                methods=["GET"],
+            )
+        )
+        logger.info("Smithery config endpoint enabled: /.well-known/mcp-config")
+
    # Note: Metrics endpoint is NOT exposed on main HTTP port for security reasons.
    # Metrics are served on dedicated port via setup_metrics() (default: 9090)

@@ -1468,58 +1618,98 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
        )

    # Add user info routes (available in both BasicAuth and OAuth modes)
-    # These require session authentication, so we wrap them in a separate app
-    from nextcloud_mcp_server.auth.session_backend import SessionAuthBackend
-    from nextcloud_mcp_server.auth.userinfo_routes import (
-        revoke_session,
-        user_info_html,
-        vector_sync_status_fragment,
-    )
-    from nextcloud_mcp_server.auth.webhook_routes import (
-        disable_webhook_preset,
-        enable_webhook_preset,
-        webhook_management_pane,
-    )
-
-    # Create a separate Starlette app for browser routes that need session auth
-    # This prevents SessionAuthBackend from interfering with FastMCP's OAuth
-    browser_routes = [
-        Route("/", user_info_html, methods=["GET"]),  # /app → webapp (HTML UI)
-        Route(
-            "/revoke", revoke_session, methods=["POST"], name="revoke_session_endpoint"
-        ),  # /app/revoke → revoke_session
-        # Vector sync status fragment (htmx polling)
-        Route(
-            "/vector-sync/status",
+    # ADR-016: Skip /app admin UI in Smithery stateless mode (no vector sync, webhooks)
+    if deployment_mode != DeploymentMode.SMITHERY_STATELESS:
+        # These require session authentication, so we wrap them in a separate app
+        from nextcloud_mcp_server.auth.session_backend import SessionAuthBackend
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            revoke_session,
+            user_info_html,
            vector_sync_status_fragment,
-            methods=["GET"],
-        ),  # /app/vector-sync/status
-        # Webhook management routes (admin-only)
-        Route("/webhooks", webhook_management_pane, methods=["GET"]),  # /app/webhooks
-        Route(
-            "/webhooks/enable/{preset_id:str}", enable_webhook_preset, methods=["POST"]
-        ),
-        Route(
-            "/webhooks/disable/{preset_id:str}",
+        )
+        from nextcloud_mcp_server.auth.viz_routes import (
+            chunk_context_endpoint,
+            vector_visualization_html,
+            vector_visualization_search,
+        )
+        from nextcloud_mcp_server.auth.webhook_routes import (
            disable_webhook_preset,
-            methods=["DELETE"],
-        ),
-    ]
+            enable_webhook_preset,
+            webhook_management_pane,
+        )

-    browser_app = Starlette(routes=browser_routes)
-    browser_app.add_middleware(
-        AuthenticationMiddleware,
-        backend=SessionAuthBackend(oauth_enabled=oauth_enabled),
-    )
+        # Create a separate Starlette app for browser routes that need session auth
+        # This prevents SessionAuthBackend from interfering with FastMCP's OAuth
+        browser_routes = [
+            Route(
+                "/", user_info_html, methods=["GET"]
+            ),  # /app → user info with all tabs
+            Route(
+                "/revoke",
+                revoke_session,
+                methods=["POST"],
+                name="revoke_session_endpoint",
+            ),  # /app/revoke → revoke_session
+            # Vector sync status fragment (htmx polling)
+            Route(
+                "/vector-sync/status",
+                vector_sync_status_fragment,
+                methods=["GET"],
+            ),  # /app/vector-sync/status
+            # Vector visualization routes
+            Route(
+                "/vector-viz", vector_visualization_html, methods=["GET"]
+            ),  # /app/vector-viz
+            Route(
+                "/vector-viz/search",
+                vector_visualization_search,
+                methods=["GET"],
+            ),  # /app/vector-viz/search
+            Route(
+                "/chunk-context",
+                chunk_context_endpoint,
+                methods=["GET"],
+            ),  # /app/chunk-context
+            # Webhook management routes (admin-only)
+            Route(
+                "/webhooks", webhook_management_pane, methods=["GET"]
+            ),  # /app/webhooks
+            Route(
+                "/webhooks/enable/{preset_id:str}",
+                enable_webhook_preset,
+                methods=["POST"],
+            ),
+            Route(
+                "/webhooks/disable/{preset_id:str}",
+                disable_webhook_preset,
+                methods=["DELETE"],
+            ),
+        ]

-    # Add redirect from /app to /app/ (Starlette requires trailing slash for mounted apps)
-    routes.append(
-        Route("/app", lambda request: RedirectResponse("/app/", status_code=307))
-    )
+        # Add static files mount if directory exists
+        static_dir = os.path.join(os.path.dirname(__file__), "auth", "static")
+        if os.path.isdir(static_dir):
+            browser_routes.append(
+                Mount("/static", StaticFiles(directory=static_dir), name="static")
+            )
+            logger.info(f"Mounted static files from {static_dir}")

-    # Mount browser app at /app (webapp and admin routes)
-    routes.append(Mount("/app", app=browser_app))
-    logger.info("App routes with session auth: /app, /app/webhooks, /app/revoke")
+        browser_app = Starlette(routes=browser_routes)
+        browser_app.add_middleware(
+            AuthenticationMiddleware,  # type: ignore[invalid-argument-type]
+            backend=SessionAuthBackend(oauth_enabled=oauth_enabled),
+        )
+
+        # Add redirect from /app to /app/ (Starlette requires trailing slash for mounted apps)
+        routes.append(
+            Route("/app", lambda request: RedirectResponse("/app/", status_code=307))
+        )
+
+        # Mount browser app at /app (webapp and admin routes)
+        routes.append(Mount("/app", app=browser_app))
+        logger.info("App routes with session auth: /app, /app/webhooks, /app/revoke")
+    else:
+        logger.info("Admin UI (/app) disabled in Smithery stateless mode")

    # Mount FastMCP at root last (catch-all, handles OAuth via token_verifier)
    routes.append(Mount("/", app=mcp_app))
@@ -1598,7 +1788,7 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):

    # Add CORS middleware to allow browser-based clients like MCP Inspector
    app.add_middleware(
-        CORSMiddleware,
+        CORSMiddleware,  # type: ignore[invalid-argument-type]
        allow_origins=["*"],  # Allow all origins for development
        allow_credentials=True,
        allow_methods=["*"],
@@ -1608,7 +1798,7 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):

    # Add observability middleware (metrics + tracing)
    if settings.metrics_enabled or settings.otel_exporter_otlp_endpoint:
-        app.add_middleware(ObservabilityMiddleware)
+        app.add_middleware(ObservabilityMiddleware)  # type: ignore[invalid-argument-type]
        logger.info("Observability middleware enabled (metrics and/or tracing)")

    # Add exception handler for scope challenges (OAuth mode only)
@@ -1639,4 +1829,11 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):

        logger.info("WWW-Authenticate scope challenge handler enabled")

+    # ADR-016: Apply SmitheryConfigMiddleware in Smithery stateless mode
+    # This must be the outermost middleware to extract config from URL query parameters
+    # before any other middleware processes the request
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+        app = SmitheryConfigMiddleware(app)
+        logger.info("SmitheryConfigMiddleware enabled for query parameter config")
+
    return app
@@ -0,0 +1,219 @@
+.viz-layout {
+    display: flex;
+    flex-direction: column;
+    gap: 16px;
+    height: 100%;
+    min-height: 0;
+    overflow-y: auto;
+}
+.viz-card {
+    background: var(--color-main-background);
+    border-radius: 0;
+    padding: 16px;
+    box-shadow: none;
+}
+.viz-controls-card {
+    flex: 0 0 auto;
+    border-bottom: 1px solid var(--color-border);
+    padding-bottom: 16px;
+}
+.viz-controls-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+    gap: 12px;
+    align-items: end;
+}
+@media (min-width: 768px) {
+    .viz-controls-grid {
+        grid-template-columns: 2fr 1.5fr 1.5fr auto auto;
+    }
+}
+.viz-control-group {
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+}
+.viz-control-group label {
+    font-weight: 500;
+    color: var(--color-main-text);
+    font-size: 13px;
+}
+.viz-control-group input[type="text"],
+.viz-control-group input[type="number"],
+.viz-control-group select {
+    width: 100%;
+    padding: 7px 10px;
+    border: 1px solid var(--color-border-dark);
+    border-radius: var(--border-radius);
+    font-size: 14px;
+    background: var(--color-main-background);
+    color: var(--color-main-text);
+}
+.viz-control-group input:focus,
+.viz-control-group select:focus {
+    outline: none;
+    border-color: var(--color-primary-element);
+}
+.viz-control-group input[type="range"] {
+    width: 100%;
+}
+.viz-control-group select[multiple] {
+    min-height: 100px;
+}
+.viz-weight-display {
+    display: inline-block;
+    min-width: 40px;
+    text-align: right;
+    color: #666;
+}
+.viz-btn {
+    background: var(--color-primary-element);
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    font-weight: 500;
+    white-space: nowrap;
+}
+.viz-btn:hover {
+    background: #0052a3;
+}
+.viz-btn-secondary {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 7px 16px;
+    border-radius: var(--border-radius);
+    cursor: pointer;
+    font-size: 14px;
+    white-space: nowrap;
+}
+.viz-btn-secondary:hover {
+    background: #5a6268;
+}
+.viz-card-plot {
+    flex: 0 0 auto;
+    display: flex;
+    flex-direction: column;
+    min-height: 500px;
+    height: 600px;
+    /* Remove horizontal padding to extend to full viewport width */
+    padding-left: 0;
+    padding-right: 0;
+    margin-left: -16px;
+    margin-right: -16px;
+}
+#viz-plot-container {
+    width: 100%;
+    height: 100%;
+    position: relative;
+    overflow: visible;
+}
+#viz-plot {
+    width: 100%;
+    height: 100%;
+}
+.viz-loading {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+}
+.viz-loading-overlay {
+    position: absolute;
+    inset: 0;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: white;
+    color: #666;
+}
+.viz-no-results {
+    text-align: center;
+    padding: 40px;
+    color: #666;
+    font-style: italic;
+}
+.viz-advanced-section {
+    margin-top: 12px;
+    padding: 12px;
+    background: var(--color-background-hover);
+    border-radius: var(--border-radius);
+    border: 1px solid var(--color-border);
+}
+.viz-info-box {
+    background: var(--color-primary-element-light);
+    border-left: 3px solid var(--color-primary-element);
+    padding: 10px 12px;
+    margin-bottom: 16px;
+    font-size: 13px;
+    color: var(--color-main-text);
+}
+.chunk-toggle-btn {
+    background: #6c757d;
+    color: white;
+    border: none;
+    padding: 4px 10px;
+    border-radius: 3px;
+    cursor: pointer;
+    font-size: 12px;
+    margin-top: 6px;
+}
+.chunk-toggle-btn:hover {
+    background: #5a6268;
+}
+.chunk-context {
+    background: var(--color-background-hover);
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    padding: 12px;
+    margin-top: 8px;
+    font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+    font-size: 13px;
+    line-height: 1.6;
+    white-space: pre-wrap;
+    word-wrap: break-word;
+}
+.chunk-text {
+    color: var(--color-text-maxcontrast);
+}
+.chunk-matched {
+    background: #fff3cd;
+    border: 1px solid #ffc107;
+    padding: 2px 4px;
+    border-radius: var(--border-radius);
+    font-weight: 500;
+    color: var(--color-main-text);
+}
+.chunk-ellipsis {
+    color: var(--color-text-maxcontrast);
+    font-style: italic;
+}
+
+/* PDF highlighted image styles */
+.chunk-image-container {
+    margin-bottom: 16px;
+    border: 1px solid var(--color-border);
+    border-radius: var(--border-radius);
+    overflow: hidden;
+    background: #fff;
+}
+.chunk-image-header {
+    background: var(--color-background-dark);
+    padding: 8px 12px;
+    font-size: 12px;
+    font-weight: 500;
+    color: var(--color-text-maxcontrast);
+    border-bottom: 1px solid var(--color-border);
+    font-family: var(--font-face);
+}
+.chunk-highlighted-image {
+    display: block;
+    max-width: 100%;
+    height: auto;
+    cursor: zoom-in;
+}
+.chunk-highlighted-image:hover {
+    opacity: 0.95;
+}
@@ -0,0 +1,253 @@
+// Initialize vizApp for vector visualization
+function vizApp() {
+    return {
+        query: '',
+        algorithm: 'bm25_hybrid',
+        fusion: 'rrf',
+        showAdvanced: false,
+        showQueryPoint: true,
+        docTypes: [''],
+        limit: 50,
+        scoreThreshold: 0.0,
+        loading: false,
+        results: [],
+        coordinates: null,
+        queryCoords: null,
+        expandedChunks: {},
+        chunkLoading: {},
+
+        init() {
+            // Set up window resize listener to resize plot
+            window.addEventListener('resize', () => {
+                if (this.coordinates && this.results.length > 0) {
+                    Plotly.Plots.resize('viz-plot');
+                }
+            });
+        },
+
+        async executeSearch() {
+            this.loading = true;
+            this.results = [];
+
+            try {
+                const params = new URLSearchParams({
+                    query: this.query,
+                    algorithm: this.algorithm,
+                    limit: this.limit,
+                    score_threshold: this.scoreThreshold,
+                });
+
+                if (this.algorithm === 'bm25_hybrid') {
+                    params.append('fusion', this.fusion);
+                }
+
+                const selectedTypes = this.docTypes.filter(t => t !== '');
+                if (selectedTypes.length > 0) {
+                    params.append('doc_types', selectedTypes.join(','));
+                }
+
+                const response = await fetch(`/app/vector-viz/search?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.results = data.results;
+                    this.coordinates = data.coordinates_3d;
+                    this.queryCoords = data.query_coords;
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                } else {
+                    alert('Search failed: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error: ' + error.message);
+            } finally {
+                this.loading = false;
+            }
+        },
+
+        updatePlot() {
+            // Toggle query point visibility without recreating the plot
+            // This preserves camera position naturally since layout is untouched
+            if (this.coordinates && this.queryCoords && this.results.length > 0) {
+                const plotDiv = document.getElementById('viz-plot');
+
+                // If plot exists, just toggle the query trace visibility
+                if (plotDiv && plotDiv.data && plotDiv.data.length >= 2) {
+                    // Trace index 1 is the query point
+                    Plotly.restyle('viz-plot', { visible: this.showQueryPoint }, [1]);
+                } else {
+                    // Plot doesn't exist yet, render it
+                    this.renderPlot(this.coordinates, this.queryCoords, this.results);
+                }
+            }
+        },
+
+        renderPlot(coordinates, queryCoords, results) {
+            // Get container dimensions before creating layout
+            const container = document.getElementById('viz-plot-container');
+            const width = container.clientWidth;
+            const height = container.clientHeight;
+
+            const scores = results.map(r => r.score);
+
+            // Trace 1: Document results (always visible)
+            const documentTrace = {
+                x: coordinates.map(c => c[0]),
+                y: coordinates.map(c => c[1]),
+                z: coordinates.map(c => c[2]),
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Documents',
+                visible: true,
+                customdata: results.map((r, i) => ({
+                    title: r.title,
+                    raw_score: r.original_score,
+                    relative_score: r.score,
+                    x: coordinates[i][0],
+                    y: coordinates[i][1],
+                    z: coordinates[i][2]
+                })),
+                hovertemplate:
+                    '<b>%{customdata.title}</b><br>' +
+                    'Raw Score: %{customdata.raw_score:.3f} (%{customdata.relative_score:.0%} relative)<br>' +
+                    '(x=%{customdata.x}, y=%{customdata.y}, z=%{customdata.z})' +
+                    '<extra></extra>',
+                marker: {
+                    size: results.map(r => 4 + (Math.pow(r.score, 2) * 10)),
+                    opacity: results.map(r => 0.3 + (r.score * 0.7)),
+                    color: scores,
+                    colorscale: 'Viridis',
+                    showscale: true,
+                    colorbar: {
+                        title: 'Relative Score',
+                        x: 1.02,
+                        xanchor: 'left',
+                        thickness: 20,
+                        len: 0.8
+                    },
+                    cmin: 0,
+                    cmax: 1
+                }
+            };
+
+            // Trace 2: Query point (visibility controlled by toggle)
+            const queryTrace = {
+                x: [queryCoords[0]],
+                y: [queryCoords[1]],
+                z: [queryCoords[2]],
+                mode: 'markers',
+                type: 'scatter3d',
+                name: 'Query',
+                visible: this.showQueryPoint,  // Initial visibility from state
+                hovertemplate:
+                    '<b>Search Query</b><br>' +
+                    `(x=${queryCoords[0]}, y=${queryCoords[1]}, z=${queryCoords[2]})` +
+                    '<extra></extra>',
+                marker: {
+                    size: 10,
+                    color: '#ef5350',  // Subdued red (Material Design Red 400)
+                    line: {
+                        color: '#c62828',  // Darker red border (Material Design Red 800)
+                        width: 1
+                    }
+                }
+            };
+
+            const layout = {
+                title: `Vector Space (PCA 3D) - ${results.length} results`,
+                width: width,   // Explicit width from container
+                height: height, // Explicit height from container
+                scene: {
+                    xaxis: { title: 'PC1' },
+                    yaxis: { title: 'PC2' },
+                    zaxis: { title: 'PC3' },
+                    camera: {
+                        eye: { x: 1.5, y: 1.5, z: 1.5 }
+                    },
+                    // Full width for 3D scene
+                    domain: {
+                        x: [0, 1],
+                        y: [0, 1]
+                    }
+                },
+                hovermode: 'closest',
+                autosize: true,  // Enable auto-sizing for window resizes
+                showlegend: false,  // Hide legend
+                margin: { l: 0, r: 100, t: 40, b: 0 }  // Right margin for colorbar
+            };
+
+            // Always render both traces - visibility is controlled by the visible property
+            const traces = [documentTrace, queryTrace];
+
+            // Enable responsive resizing
+            const config = {
+                responsive: true,
+                displayModeBar: true
+            };
+
+            // Use newPlot() with explicit dimensions - renders at correct size immediately
+            // Camera position will be preserved by subsequent Plotly.restyle() calls in updatePlot()
+            Plotly.newPlot('viz-plot', traces, layout, config);
+        },
+
+        getNextcloudUrl(result) {
+            // Use global NEXTCLOUD_BASE_URL if set, otherwise construct from window location
+            const baseUrl = window.NEXTCLOUD_BASE_URL || '';
+            switch (result.doc_type) {
+                case 'note':
+                    return `${baseUrl}/apps/notes/note/${result.id}`;
+                case 'file':
+                    return `${baseUrl}/apps/files/?fileId=${result.id}`;
+                case 'calendar':
+                    return `${baseUrl}/apps/calendar`;
+                case 'contact':
+                    return `${baseUrl}/apps/contacts`;
+                case 'deck':
+                    return `${baseUrl}/apps/deck`;
+                default:
+                    return `${baseUrl}`;
+            }
+        },
+
+        hasChunkPosition(result) {
+            return result.chunk_start_offset != null && result.chunk_end_offset != null;
+        },
+
+        isChunkExpanded(resultKey) {
+            return this.expandedChunks[resultKey] !== undefined;
+        },
+
+        async toggleChunk(result) {
+            const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`;
+
+            if (this.isChunkExpanded(resultKey)) {
+                delete this.expandedChunks[resultKey];
+                return;
+            }
+
+            this.chunkLoading[resultKey] = true;
+
+            try {
+                const params = new URLSearchParams({
+                    doc_type: result.doc_type,
+                    doc_id: result.id,
+                    start: result.chunk_start_offset,
+                    end: result.chunk_end_offset,
+                    context: 500
+                });
+
+                const response = await fetch(`/app/chunk-context?${params}`);
+                const data = await response.json();
+
+                if (data.success) {
+                    this.expandedChunks[resultKey] = data;
+                } else {
+                    alert('Failed to load chunk: ' + data.error);
+                }
+            } catch (error) {
+                alert('Error loading chunk: ' + error.message);
+            } finally {
+                delete this.chunkLoading[resultKey];
+            }
+        }
+    };
+}
@@ -1310,7 +1310,7 @@ async def generate_encryption_key() -> str:

 # Example usage
 if __name__ == "__main__":
-    import asyncio
+    import anyio

    async def main():
        # Generate a key for testing
@@ -1318,4 +1318,4 @@ if __name__ == "__main__":
        print(f"Generated encryption key: {key}")
        print(f"Set this in your environment: export TOKEN_ENCRYPTION_KEY='{key}'")

-    asyncio.run(main())
+    anyio.run(main)
@@ -0,0 +1,524 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1">
+    <meta name="apple-mobile-web-app-capable" content="yes">
+    <meta name="theme-color" content="#0082c9">
+    <title>{% block title %}Nextcloud MCP Server{% endblock %}</title>
+
+    <!-- Favicon -->
+    <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' width='32' height='32' viewBox='0 0 512 512'><rect width='512' height='512' rx='80' ry='80' fill='%230082C9'/><path d='M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z' fill='%23fff'/></svg>">
+
+    <!-- Open Sans font -->
+    <style>
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: normal;
+            src: local('Open Sans'), local('OpenSans');
+        }
+        @font-face {
+            font-family: 'Open Sans';
+            font-style: normal;
+            font-weight: bold;
+            src: local('Open Sans Semibold'), local('OpenSans-Semibold');
+        }
+    </style>
+
+    {% block extra_head %}{% endblock %}
+
+    <style>
+        /* Nextcloud App Design System */
+
+        /* CSS Variables */
+        :root {
+            /* Primary Colors */
+            --color-primary: #00679e;
+            --color-primary-element: #00679e;
+            --color-primary-light: #e5eff5;
+            --color-primary-element-light: #e5eff5;
+
+            /* Background Colors */
+            --color-main-background: #ffffff;
+            --color-background-dark: #ededed;
+            --color-background-hover: #f5f5f5;
+
+            /* Text Colors */
+            --color-main-text: #222222;
+            --color-text-maxcontrast: #6b6b6b;
+            --color-text-light: #767676;
+
+            /* Border Colors */
+            --color-border: #ededed;
+            --color-border-dark: #dbdbdb;
+
+            /* Borders & Radius */
+            --border-radius: 3px;
+            --border-radius-large: 10px;
+            --border-radius-pill: 100px;
+
+            /* Spacing */
+            --default-grid-baseline: 4px;
+            --default-clickable-area: 44px;
+        }
+
+        /* SVG Icon Styles */
+        .nav-icon {
+            width: 20px;
+            height: 20px;
+            display: inline-block;
+            fill: var(--color-main-text);
+            opacity: 0.7;
+        }
+
+        .app-navigation-entry.active .nav-icon {
+            fill: var(--color-primary-element);
+            opacity: 1;
+        }
+
+        /* General */
+        * {
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+            color: var(--color-main-text);
+            background: var(--color-main-background);
+            margin: 0;
+            padding: 0;
+        }
+
+        h1, h2, h3 {
+            font-weight: 300;
+            line-height: 1.2;
+        }
+
+        h1 {
+            font-size: 32px;
+            margin: 0 0 20px 0;
+            color: var(--color-main-text);
+        }
+
+        h2 {
+            font-size: 20px;
+            margin: 20px 0 12px 0;
+            color: var(--color-main-text);
+            border-bottom: 1px solid var(--color-border);
+            padding-bottom: 8px;
+        }
+
+        h3 {
+            font-size: 16px;
+            margin: 16px 0 8px 0;
+            color: var(--color-main-text);
+            font-weight: 500;
+        }
+
+        img {
+            max-width: 100%;
+        }
+
+        /* App Header (simplified, no full menu) */
+        .app-header {
+            height: 50px;
+            background: var(--color-primary-element);
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            position: sticky;
+            top: 0;
+            z-index: 100;
+            display: flex;
+            align-items: center;
+            padding: 0 20px;
+        }
+
+        .app-header__brand {
+            color: white;
+            font-size: 18px;
+            font-weight: 600;
+            text-decoration: none;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+
+        .app-header__brand:hover {
+            opacity: 0.9;
+        }
+
+        .app-header__logo {
+            height: 32px;
+            width: 32px;
+            fill: white;
+        }
+
+        /* App Layout */
+        .app-content-wrapper {
+            display: flex;
+            height: calc(100vh - 50px);
+            overflow: hidden;
+        }
+
+        /* Side Navigation */
+        #app-navigation {
+            width: 250px;
+            background: var(--color-main-background);
+            border-right: 1px solid var(--color-border);
+            display: flex;
+            flex-direction: column;
+            flex-shrink: 0;
+            transition: margin-left 0.3s ease;
+        }
+
+        #app-navigation.app-navigation--closed {
+            margin-left: -250px;
+        }
+
+        .app-navigation__content {
+            flex: 1;
+            overflow-y: auto;
+            padding: 8px;
+            display: flex;
+            flex-direction: column;
+        }
+
+        .app-navigation-list {
+            list-style: none;
+            padding: 0;
+            margin: 0;
+            flex: 1;
+        }
+
+        .app-navigation-entry {
+            position: relative;
+            margin-bottom: 2px;
+        }
+
+        .app-navigation-entry__wrapper {
+            display: flex;
+            align-items: center;
+            position: relative;
+        }
+
+        .app-navigation-entry-link {
+            display: flex;
+            align-items: center;
+            padding: 0 8px;
+            min-height: var(--default-clickable-area);
+            border-radius: var(--border-radius);
+            transition: background-color 100ms ease-in-out;
+            text-decoration: none;
+            color: var(--color-main-text);
+            flex: 1;
+            font-size: 14px;
+        }
+
+        .app-navigation-entry-link:hover {
+            background-color: var(--color-background-hover);
+        }
+
+        .app-navigation-entry.active .app-navigation-entry-link {
+            background-color: var(--color-primary-element-light);
+            font-weight: 500;
+        }
+
+        .app-navigation-entry-icon {
+            width: var(--default-clickable-area);
+            height: var(--default-clickable-area);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            margin-right: 0;
+        }
+
+        .app-navigation-entry__name {
+            flex: 1;
+            white-space: nowrap;
+            overflow: hidden;
+            text-overflow: ellipsis;
+        }
+
+        .app-navigation-entry__counter {
+            margin-left: auto;
+            padding: 2px 6px;
+            border-radius: var(--border-radius-pill);
+            background-color: var(--color-background-dark);
+            font-size: 11px;
+            color: var(--color-text-maxcontrast);
+            min-width: 20px;
+            text-align: center;
+        }
+
+        .app-navigation__settings {
+            list-style: none;
+            padding: 8px 0 0 0;
+            margin: 8px 0 0 0;
+            border-top: 1px solid var(--color-border);
+            flex-shrink: 0;
+        }
+
+        .app-navigation-toggle {
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            position: fixed;
+            top: 60px;
+            left: 10px;
+            z-index: 110;
+            background: var(--color-main-background);
+            border: 1px solid var(--color-border);
+            border-radius: var(--border-radius);
+            padding: 8px 12px;
+            cursor: pointer;
+            box-shadow: 0 0 5px rgba(0,0,0,0.1);
+            transition: left 0.3s ease;
+        }
+
+        .app-navigation-toggle:hover {
+            background: var(--color-background-hover);
+        }
+
+        #app-navigation:not(.app-navigation--closed) ~ * .app-navigation-toggle {
+            left: 260px;
+        }
+
+        /* Main Content Area */
+        #app-content {
+            flex: 1;
+            overflow-y: auto;
+            background: var(--color-main-background);
+        }
+
+        .page-content {
+            max-width: 1000px;
+            margin: 0 auto;
+            padding: 24px;
+        }
+
+        .content-section {
+            background: var(--color-main-background);
+            border-radius: 0;
+            padding: 0;
+            box-shadow: none;
+        }
+
+        .content-section h1 {
+            font-size: 24px;
+            font-weight: 600;
+            margin-bottom: 24px;
+        }
+
+        .content-section h2 {
+            font-size: 18px;
+            font-weight: 500;
+            margin: 24px 0 12px 0;
+            border-bottom: none;
+            padding-bottom: 0;
+        }
+
+        .content-section h3 {
+            font-size: 16px;
+            font-weight: 500;
+        }
+
+        /* Responsive */
+        @media (max-width: 768px) {
+            #app-navigation {
+                position: fixed;
+                height: calc(100vh - 50px);
+                z-index: 105;
+                box-shadow: 2px 0 8px rgba(0,0,0,0.1);
+            }
+
+            .page-content {
+                padding: 16px;
+            }
+        }
+
+        /* Footer */
+        footer.page-footer {
+            background-color: #0F0833;
+            color: #ffffff;
+            padding: 40px 0;
+            margin-top: 60px;
+        }
+
+        footer.page-footer .bootstrap-container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 0 20px;
+        }
+
+        footer.page-footer h1 {
+            font-size: 15px;
+            font-weight: bold;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 20px;
+        }
+
+        footer.page-footer ul {
+            list-style-type: none;
+            padding-left: 0;
+        }
+
+        footer.page-footer li {
+            font-size: 13px;
+            line-height: 1.8;
+            color: #ffffff;
+            margin-top: 0;
+        }
+
+        footer.page-footer li a {
+            color: #ffffff;
+            text-decoration: none;
+            display: block;
+            padding: 4px 0;
+        }
+
+        footer.page-footer li a:hover {
+            text-decoration: underline;
+        }
+
+        footer.page-footer p {
+            font-size: 15px;
+            line-height: 1.8;
+            color: #ffffff;
+        }
+
+        footer.page-footer p.copyright {
+            color: rgba(255, 255, 255, 0.5);
+            font-size: 13px;
+            text-align: center;
+            margin-top: 30px;
+        }
+
+        /* Buttons */
+        .btn {
+            border-radius: 50px;
+            padding: 10px 20px;
+            text-decoration: none;
+            display: inline-block;
+            cursor: pointer;
+            border: none;
+            font-size: 14px;
+            transition: all 0.3s;
+        }
+
+        .btn-primary {
+            background: #0082C9;
+            border: 1px solid #0062C9;
+            color: #fff;
+        }
+
+        .btn-primary:hover {
+            background: #006ba3;
+        }
+
+        /* Tables */
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 20px 0;
+        }
+
+        td {
+            padding: 12px 8px;
+            border-bottom: 1px solid var(--color-border);
+            font-size: 14px;
+        }
+
+        td:first-child {
+            width: 180px;
+            color: var(--color-text-maxcontrast);
+            font-weight: 500;
+        }
+
+        code {
+            background-color: var(--color-background-dark);
+            padding: 2px 6px;
+            border-radius: var(--border-radius);
+            font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace;
+            font-size: 90%;
+            color: var(--color-main-text);
+        }
+
+        /* Badges */
+        .badge {
+            display: inline-block;
+            padding: 3px 8px;
+            border-radius: 12px;
+            font-size: 12px;
+            font-weight: bold;
+            text-transform: uppercase;
+        }
+
+        .badge-oauth {
+            background-color: #4caf50;
+            color: white;
+        }
+
+        .badge-basic {
+            background-color: #2196f3;
+            color: white;
+        }
+
+        /* Messages */
+        .warning {
+            background-color: #fff3cd;
+            border-left: 4px solid #ffc107;
+            padding: 15px;
+            margin: 15px 0;
+            color: #856404;
+        }
+
+        .info-message {
+            background-color: #e3f2fd;
+            border-left: 4px solid #2196f3;
+            padding: 15px;
+            margin: 15px 0;
+            color: #1565c0;
+        }
+
+        .error {
+            background-color: #ffebee;
+            border-left: 4px solid #d32f2f;
+            padding: 15px;
+            margin: 15px 0;
+            color: #c62828;
+        }
+
+        .success {
+            background-color: #e8f5e9;
+            border: 2px solid #4caf50;
+            padding: 30px;
+            border-radius: 8px;
+            text-align: center;
+        }
+
+        .success h1 {
+            color: #4caf50;
+        }
+
+        {% block extra_styles %}{% endblock %}
+    </style>
+</head>
+<body>
+    <!-- App Header -->
+    <header class="app-header">
+        <a href="/app" class="app-header__brand">
+            <svg class="app-header__logo" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512">
+                <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+            </svg>
+            <span>Nextcloud MCP Server</span>
+        </a>
+    </header>
+
+    <!-- App Content Wrapper (Sidebar + Main Content) -->
+    {% block content %}{% endblock %}
+
+    {% block scripts %}{% endblock %}
+</body>
+</html>
@@ -0,0 +1,19 @@
+{% extends "base.html" %}
+
+{% block title %}{{ error_title|default('Error') }} - Nextcloud MCP Server{% endblock %}
+
+{% block content %}
+<h1>{{ error_title|default('Error') }}</h1>
+
+<div class="error">
+    <strong>Error:</strong> {{ error_message }}
+</div>
+
+{% if login_url %}
+<p><a href="{{ login_url }}" class="btn btn-primary">Login again</a></p>
+{% endif %}
+
+{% if back_url %}
+<p><a href="{{ back_url }}" class="btn">Go Back</a></p>
+{% endif %}
+{% endblock %}
@@ -0,0 +1,21 @@
+{% extends "base.html" %}
+
+{% block title %}{{ success_title|default('Success') }} - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+{% if redirect_url and redirect_delay %}
+<meta http-equiv="refresh" content="{{ redirect_delay }};url={{ redirect_url }}">
+{% endif %}
+{% endblock %}
+
+{% block content %}
+<div class="success">
+    <h1>{{ success_title|default('✓ Success') }}</h1>
+    {% for message in success_messages %}
+    <p>{{ message }}</p>
+    {% endfor %}
+    {% if redirect_url %}
+    <p>Redirecting...</p>
+    {% endif %}
+</div>
+{% endblock %}
@@ -0,0 +1,650 @@
+{% extends "base.html" %}
+
+{% block title %}Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- htmx for dynamic loading -->
+    <script src="https://unpkg.com/htmx.org@1.9.10"></script>
+
+    <!-- Alpine.js for state management -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+
+    <!-- Plotly.js for vector visualization -->
+    <script src="https://cdn.plot.ly/plotly-3.3.0.min.js"></script>
+
+    <!-- Vector Viz static assets -->
+    <link rel="stylesheet" href="/app/static/vector-viz.css">
+{% endblock %}
+
+{% block extra_styles %}
+    /* Smooth htmx transitions */
+    .htmx-swapping {
+        opacity: 0;
+        transition: opacity 200ms ease-out;
+    }
+
+    .htmx-settling {
+        opacity: 1;
+        transition: opacity 200ms ease-in;
+    }
+
+    /* Logout button styling */
+    .logout-section {
+        margin-top: 20px;
+        padding-top: 20px;
+        border-top: 1px solid var(--color-border);
+    }
+
+    /* Welcome tab specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper" x-data="{ activeSection: 'welcome', navOpen: true }">
+    <!-- Side Navigation -->
+    <nav id="app-navigation" :class="{ 'app-navigation--closed': !navOpen }">
+        <div class="app-navigation__content">
+            <!-- Navigation List -->
+            <ul class="app-navigation-list">
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'welcome' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'welcome'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10,20V14H14V20H19V12H22L12,3L2,12H5V20H10Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Welcome</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'user-info' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'user-info'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">User Info</span>
+                        </a>
+                    </div>
+                </li>
+
+                {% if show_vector_sync_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-sync' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-sync'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Sync</span>
+                        </a>
+                    </div>
+                </li>
+
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'vector-viz' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'vector-viz'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Vector Viz</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+
+                {% if show_webhooks_tab %}
+                <li class="app-navigation-entry" :class="{ 'active': activeSection === 'webhooks' }">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="#"
+                           @click.prevent="activeSection = 'webhooks'"
+                           class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M10.59,13.41C11,13.8 11,14.44 10.59,14.83C10.2,15.22 9.56,15.22 9.17,14.83C7.22,12.88 7.22,9.71 9.17,7.76V7.76L12.71,4.22C14.66,2.27 17.83,2.27 19.78,4.22C21.73,6.17 21.73,9.34 19.78,11.29L18.29,12.78C18.3,11.96 18.17,11.14 17.89,10.36L18.36,9.88C19.54,8.71 19.54,6.81 18.36,5.64C17.19,4.46 15.29,4.46 14.12,5.64L10.59,9.17C9.41,10.34 9.41,12.24 10.59,13.41M13.41,9.17C13.8,8.78 14.44,8.78 14.83,9.17C16.78,11.12 16.78,14.29 14.83,16.24V16.24L11.29,19.78C9.34,21.73 6.17,21.73 4.22,19.78C2.27,17.83 2.27,14.66 4.22,12.71L5.71,11.22C5.7,12.04 5.83,12.86 6.11,13.65L5.64,14.12C4.46,15.29 4.46,17.19 5.64,18.36C6.81,19.54 8.71,19.54 9.88,18.36L13.41,14.83C14.59,13.66 14.59,11.76 13.41,10.59C13,10.2 13,9.56 13.41,9.17Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Webhooks</span>
+                        </a>
+                    </div>
+                </li>
+                {% endif %}
+            </ul>
+
+            <!-- Settings/Logout at bottom -->
+            {% if logout_url %}
+            <ul class="app-navigation__settings">
+                <li class="app-navigation-entry">
+                    <div class="app-navigation-entry__wrapper">
+                        <a href="{{ logout_url }}" class="app-navigation-entry-link">
+                            <span class="app-navigation-entry-icon">
+                                <svg class="nav-icon" viewBox="0 0 24 24">
+                                    <path d="M16,17V14H9V10H16V7L21,12L16,17M14,2A2,2 0 0,1 16,4V6H14V4H5V20H14V18H16V20A2,2 0 0,1 14,22H5A2,2 0 0,1 3,20V4A2,2 0 0,1 5,2H14Z" />
+                                </svg>
+                            </span>
+                            <span class="app-navigation-entry__name">Logout</span>
+                        </a>
+                    </div>
+                </li>
+            </ul>
+            {% endif %}
+        </div>
+
+        <!-- Toggle Button (mobile) -->
+        <button @click="navOpen = !navOpen"
+                class="app-navigation-toggle"
+                :aria-expanded="navOpen.toString()">
+            ☰
+        </button>
+    </nav>
+
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Welcome Section -->
+            <div x-show="activeSection === 'welcome'">
+                <!-- Hero Section -->
+                <div class="hero-section">
+                    <h1>Welcome to Nextcloud MCP Server</h1>
+                    <p>
+                        Interactive user interface for semantic search and document retrieval.
+                        Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                    </p>
+                </div>
+
+                <!-- Authentication Status -->
+                <div class="auth-status">
+                    <svg viewBox="0 0 24 24">
+                        <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                    </svg>
+                    <div class="auth-status-text">
+                        <strong>Authenticated as: {{ username }}</strong>
+                        <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                    </div>
+                </div>
+
+                {% if vector_sync_enabled %}
+                <!-- Vector Sync Enabled Content -->
+                <div class="info-section">
+                    <h2>About Semantic Search</h2>
+                    <p>
+                        This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                        Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                        conceptually similar content across your Nextcloud apps.
+                    </p>
+                    <p>
+                        <strong>How it works:</strong>
+                    </p>
+                    <ul>
+                        <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                        <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                        <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                        <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                    </ul>
+                </div>
+
+                <div class="info-section">
+                    <h2>RAG Workflow Integration</h2>
+                    <p>
+                        This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                        Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                    </p>
+                    <ul>
+                        <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                        <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                        <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                    </ul>
+
+                    <!-- RAG Workflow Diagram -->
+                    <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; overflow-x: auto;">
+                        <div style="text-align: center; font-weight: 600; margin-bottom: 20px; color: var(--color-primary-element); font-size: 16px;">
+                            MCP Sampling RAG Workflow
+                        </div>
+
+                        <!-- Four-component bidirectional flow -->
+                        <div style="max-width: 1000px; margin: 0 auto;">
+                            <div style="display: grid; grid-template-columns: 0.7fr auto 1fr auto 1fr auto 0.9fr; gap: 10px; align-items: center;">
+                                <!-- User -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 14px; text-align: center;">
+                                    <div style="font-size: 26px; margin-bottom: 5px;">👤</div>
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px;">User</div>
+                                    <div style="font-size: 9px; color: var(--color-text-maxcontrast); font-style: italic; margin-top: 5px; line-height: 1.2;">
+                                        "What are health<br>benefits of coffee?"
+                                    </div>
+                                </div>
+
+                                <!-- Arrow User <-> Client -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-text-maxcontrast);">↔</div>
+                                </div>
+
+                                <!-- MCP Client + LLM (combined) -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Client + LLM</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; margin-bottom: 6px;">
+                                        <div style="font-size: 9px; color: var(--color-text-maxcontrast);">(Claude Code)</div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 8px; border: 2px solid var(--color-primary-element);">
+                                        <div style="font-size: 16px; margin-bottom: 2px;">🧠</div>
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 10px;">Client's LLM</div>
+                                        <div style="font-size: 8px; color: var(--color-text-maxcontrast);">(Claude)</div>
+                                    </div>
+
+                                    <div style="margin-top: 8px; font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        <strong>Enables RAG:</strong><br>
+                                        Receives context,<br>
+                                        generates answer
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Client <-> Server -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Query +<br>
+                                        Sampling
+                                    </div>
+                                </div>
+
+                                <!-- MCP Server -->
+                                <div style="background: var(--color-primary-element-light); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 12px; text-align: center;">
+                                    <div style="font-weight: 600; color: var(--color-primary-element); font-size: 13px; margin-bottom: 8px;">MCP Server</div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">1. Semantic Search</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Vector embeddings<br>
+                                            BM25 Hybrid + RRF
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">2. Retrieve Context</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Top relevant docs<br>
+                                            with scores
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px; margin-bottom: 5px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">3. Format Response</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Document chunks<br>
+                                            with citations
+                                        </div>
+                                    </div>
+
+                                    <div style="background: var(--color-main-background); border-radius: var(--border-radius); padding: 7px;">
+                                        <div style="font-weight: 600; color: var(--color-main-text); font-size: 9px; margin-bottom: 2px;">4. Send to LLM</div>
+                                        <div style="font-size: 7px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                            Via MCP sampling<br>
+                                            for answer generation
+                                        </div>
+                                    </div>
+                                </div>
+
+                                <!-- Arrow Server <-> Nextcloud -->
+                                <div style="text-align: center;">
+                                    <div style="font-size: 20px; color: var(--color-primary-element);">↔</div>
+                                    <div style="font-size: 7px; color: var(--color-text-maxcontrast); margin-top: 2px; font-weight: 600; line-height: 1.1;">
+                                        Retrieve
+                                    </div>
+                                </div>
+
+                                <!-- Nextcloud -->
+                                <div style="background: var(--color-background-hover); border: 2px solid var(--color-border); border-radius: var(--border-radius-large); padding: 12px; text-align: center; position: relative;">
+                                    <img src="/app/static/nextcloud-logo.png" alt="Nextcloud" style="width: 40px; height: 40px; margin-bottom: 6px;" />
+                                    <div style="font-weight: 600; color: var(--color-main-text); font-size: 12px; margin-bottom: 4px;">Nextcloud</div>
+                                    <div style="font-size: 8px; color: var(--color-text-maxcontrast); line-height: 1.2;">
+                                        Notes, Calendar,<br>
+                                        Files, Contacts,<br>
+                                        Deck
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- Explanation below diagram -->
+                            <div style="margin-top: 24px; padding: 16px; background: var(--color-background-hover); border-radius: var(--border-radius); border-left: 4px solid var(--color-primary-element);">
+                                <div style="font-size: 12px; color: var(--color-main-text); line-height: 1.6;">
+                                    <strong>How RAG works via MCP Sampling:</strong>
+                                </div>
+                                <ol style="margin: 8px 0 0 0; padding-left: 20px; font-size: 11px; color: var(--color-text-maxcontrast); line-height: 1.6;">
+                                    <li>User asks question through MCP Client</li>
+                                    <li>Client sends query to MCP Server</li>
+                                    <li>Server retrieves relevant document context from Nextcloud</li>
+                                    <li><strong>Server sends context back to Client's LLM</strong> (MCP Sampling)</li>
+                                    <li>Client's LLM generates answer with citations using retrieved context</li>
+                                    <li>Answer returned to user</li>
+                                </ol>
+                                <div style="margin-top: 8px; font-size: 10px; color: var(--color-text-maxcontrast); font-style: italic;">
+                                    The server has no LLM - it only retrieves context. The client's existing LLM is reused for answer generation.
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+
+                    <p style="margin-top: 16px;">
+                        <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                        Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                        full control over which model is used and ensuring all processing happens client-side.
+                    </p>
+
+                    <p>
+                        By using this interface, you can preview search results, understand relevance scores, and verify
+                        that the system retrieves the right information before it reaches the LLM.
+                    </p>
+                </div>
+
+                <!-- Feature Cards -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-sync'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Sync Status</h3>
+                        <p>
+                            Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                            and synchronization status.
+                        </p>
+                    </a>
+
+                    <a href="#" @click.prevent="activeSection = 'vector-viz'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                            </svg>
+                        </div>
+                        <h3>Vector Visualization</h3>
+                        <p>
+                            Interactive search interface with 2D PCA visualization. Compare algorithms,
+                            view relevance scores, and explore matched document chunks.
+                        </p>
+                    </a>
+                </div>
+
+                {% else %}
+                <!-- Vector Sync Disabled Content -->
+                <div class="warning">
+                    <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                    <p>
+                        Semantic search and vector visualization features are currently disabled.
+                        To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                    </p>
+                    <p style="margin-bottom: 0;">
+                        <strong>Learn more:</strong>
+                        <a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                            Configuration Guide
+                        </a>
+                    </p>
+                </div>
+
+                <!-- Limited Feature Card -->
+                <h2>Available Features</h2>
+                <div class="feature-grid">
+                    <a href="#" @click.prevent="activeSection = 'user-info'" class="feature-card">
+                        <div class="feature-icon">
+                            <svg viewBox="0 0 24 24">
+                                <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                            </svg>
+                        </div>
+                        <h3>User Information</h3>
+                        <p>
+                            View your authentication details, session information, and IdP profile.
+                            Manage background access permissions.
+                        </p>
+                    </a>
+                </div>
+                {% endif %}
+
+                <!-- Documentation Section -->
+                <div class="info-section" style="margin-top: 40px;">
+                    <h2>Documentation</h2>
+                    <p>
+                        For detailed information about configuration, authentication modes, and advanced features,
+                        please refer to the project documentation:
+                    </p>
+                    <ul>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                        {% if vector_sync_enabled %}
+                        <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                        {% endif %}
+                    </ul>
+                </div>
+            </div>
+
+            <!-- User Info Section -->
+            <div x-show="activeSection === 'user-info'">
+                <div class="content-section">
+                    <h1>User Information</h1>
+                    {{ user_info_tab_html|safe }}
+                </div>
+            </div>
+
+            {% if show_vector_sync_tab %}
+            <!-- Vector Sync Section -->
+            <div x-show="activeSection === 'vector-sync'">
+                <div class="content-section">
+                    <h1>Vector Sync Status</h1>
+                    {{ vector_sync_tab_html|safe }}
+                </div>
+            </div>
+
+            <!-- Vector Viz Section -->
+            <div x-show="activeSection === 'vector-viz'">
+                <div class="content-section">
+                    <h1>Vector Visualization</h1>
+                    <div hx-get="/app/vector-viz" hx-trigger="load" hx-swap="outerHTML">
+                        <p style="color: #999;">Loading vector visualization...</p>
+                    </div>
+                </div>
+            </div>
+            {% endif %}
+
+            {% if show_webhooks_tab %}
+            <!-- Webhooks Section -->
+            <div x-show="activeSection === 'webhooks'">
+                <div class="content-section">
+                    <h1>Webhook Management</h1>
+                    {{ webhooks_tab_html|safe }}
+                </div>
+            </div>
+            {% endif %}
+        </div>
+    </main>
+</div>
+
+<script>
+    // Set global Nextcloud base URL for use in external JS
+    window.NEXTCLOUD_BASE_URL = '{{ nextcloud_host_for_links }}';
+</script>
+<script src="/app/static/vector-viz.js"></script>
+{% endblock %}
@@ -0,0 +1,180 @@
+<div x-data="vizApp()">
+    <div class="viz-layout">
+        <!-- Top: Search Controls -->
+        <div class="viz-card viz-controls-card">
+            <form @submit.prevent="executeSearch">
+                <div class="viz-controls-grid">
+                    <div class="viz-control-group">
+                        <label>Search Query</label>
+                        <input type="text" x-model="query" placeholder="Enter search query..." required />
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Algorithm</label>
+                        <select x-model="algorithm">
+                            <option value="semantic">Semantic (Dense)</option>
+                            <option value="bm25_hybrid" selected>BM25 Hybrid</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>Fusion</label>
+                        <select x-model="fusion" :disabled="algorithm !== 'bm25_hybrid'" :style="algorithm !== 'bm25_hybrid' ? 'opacity: 0.5; cursor: not-allowed;' : ''">
+                            <option value="rrf" selected>RRF</option>
+                            <option value="dbsf">DBSF</option>
+                        </select>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="submit" class="viz-btn">Search</button>
+                    </div>
+
+                    <div class="viz-control-group">
+                        <label>&nbsp;</label>
+                        <button type="button" class="viz-btn-secondary" @click="showAdvanced = !showAdvanced">
+                            <span x-text="showAdvanced ? 'Hide' : 'Advanced'"></span>
+                        </button>
+                    </div>
+                </div>
+
+                <!-- Advanced Options (Collapsible) -->
+                <div x-show="showAdvanced" style="margin-top: 16px;">
+                    <div class="viz-controls-grid" style="grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));">
+                        <div class="viz-control-group">
+                            <label>Document Types</label>
+                            <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 8px; font-size: 13px;">
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="" style="margin-right: 4px;">
+                                    <span>All</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="note" style="margin-right: 4px;">
+                                    <span>Notes</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="file" style="margin-right: 4px;">
+                                    <span>Files</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="calendar" style="margin-right: 4px;">
+                                    <span>Calendar</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="contact" style="margin-right: 4px;">
+                                    <span>Contacts</span>
+                                </label>
+                                <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
+                                    <input type="checkbox" x-model="docTypes" value="deck" style="margin-right: 4px;">
+                                    <span>Deck</span>
+                                </label>
+                            </div>
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Score Threshold</label>
+                            <input type="number" x-model.number="scoreThreshold" min="0" max="1" step="any" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Result Limit</label>
+                            <input type="number" x-model.number="limit" min="1" max="1000" />
+                        </div>
+
+                        <div class="viz-control-group">
+                            <label>Display Options</label>
+                            <label style="display: flex; align-items: center; cursor: pointer; font-weight: normal; margin-top: 4px;">
+                                <input type="checkbox" x-model="showQueryPoint" @change="updatePlot()" style="margin-right: 6px;">
+                                <span>Show Query Point</span>
+                            </label>
+                        </div>
+                    </div>
+                </div>
+            </form>
+        </div>
+
+        <!-- Plot -->
+        <div class="viz-card viz-card-plot">
+            <div id="viz-plot-container">
+                <div x-show="loading" class="viz-loading-overlay" x-transition.opacity.duration.200ms>
+                    Executing search and computing PCA projection...
+                </div>
+                <div id="viz-plot" x-show="!loading" x-transition.opacity.duration.200ms></div>
+            </div>
+        </div>
+
+        <!-- Results -->
+        <div class="viz-card" style="flex: 0 0 auto;">
+            <h3 style="margin-top: 0;">Search Results (<span x-text="loading ? '...' : results.length"></span>)</h3>
+
+        <div x-show="loading" class="viz-loading" x-transition.opacity.duration.200ms>
+            Loading results...
+        </div>
+
+        <div x-show="!loading && results.length === 0" class="viz-no-results" x-transition.opacity.duration.200ms>
+            No results found. Try a different query or adjust your search parameters.
+        </div>
+
+        <template x-if="!loading && results.length > 0">
+            <div x-transition.opacity.duration.200ms>
+                <template x-for="result in results" :key="`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`">
+                    <div style="padding: 12px; border-bottom: 1px solid #eee;">
+                        <a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
+                            <span x-text="result.title"></span>
+                        </a>
+                        <div style="font-size: 14px; color: #666; margin-top: 4px;"
+                             x-text="result.excerpt.length > 200 ? result.excerpt.substring(0, 200) + '...' : result.excerpt"></div>
+                        <div style="font-size: 12px; color: #999; margin-top: 4px;">
+                            Raw Score: <span x-text="result.original_score.toFixed(3)"></span>
+                            (<span x-text="(result.score * 100).toFixed(0)"></span>% relative) |
+                            Type: <span x-text="result.doc_type"></span>
+                        </div>
+
+                        <!-- Show Chunk button (only if chunk position is available) -->
+                        <template x-if="hasChunkPosition(result)">
+                            <button
+                                class="chunk-toggle-btn"
+                                @click="toggleChunk(result)"
+                                x-text="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`) ? 'Hide Chunk' : 'Show Chunk'"
+                            ></button>
+                        </template>
+
+                        <!-- Chunk context (expanded inline) -->
+                        <template x-if="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`)">
+                            <div class="chunk-context" x-transition.opacity.duration.200ms>
+                                <template x-if="chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div style="color: #666; font-style: italic;">Loading chunk...</div>
+                                </template>
+                                <template x-if="!chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
+                                    <div>
+                                        <!-- Highlighted page image for PDFs -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image">
+                                            <div class="chunk-image-container">
+                                                <div class="chunk-image-header">
+                                                    <span>Page <span x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"></span></span>
+                                                </div>
+                                                <img
+                                                    :src="'data:image/png;base64,' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image"
+                                                    :alt="'Page ' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"
+                                                    class="chunk-highlighted-image"
+                                                />
+                                            </div>
+                                        </template>
+                                        <!-- Text context -->
+                                        <template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_before">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                        <span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_after">
+                                            <span class="chunk-ellipsis">...</span>
+                                        </template>
+                                    </div>
+                                </template>
+                            </div>
+                        </template>
+                    </div>
+                </template>
+            </div>
+        </template>
+        </div><!-- Search Results -->
+    </div><!-- .viz-layout -->
+</div><!-- x-data="vizApp()" -->
@@ -0,0 +1,392 @@
+{% extends "base.html" %}
+
+{% block title %}Welcome - Nextcloud MCP Server{% endblock %}
+
+{% block extra_head %}
+    <!-- Alpine.js for interactive elements -->
+    <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
+{% endblock %}
+
+{% block extra_styles %}
+    /* Welcome page specific styles */
+    .hero-section {
+        background: linear-gradient(135deg, var(--color-primary-element) 0%, #0082c9 100%);
+        color: white;
+        padding: 60px 24px;
+        margin: -24px -24px 40px -24px;
+        border-radius: 0 0 var(--border-radius-large) var(--border-radius-large);
+        text-align: center;
+    }
+
+    .hero-section h1 {
+        color: white;
+        font-size: 36px;
+        margin: 0 0 16px 0;
+        font-weight: 600;
+    }
+
+    .hero-section p {
+        font-size: 18px;
+        opacity: 0.95;
+        max-width: 700px;
+        margin: 0 auto;
+        line-height: 1.6;
+    }
+
+    .feature-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
+        gap: 24px;
+        margin: 32px 0;
+    }
+
+    .feature-card {
+        background: var(--color-main-background);
+        border: 2px solid var(--color-border);
+        border-radius: var(--border-radius-large);
+        padding: 24px;
+        transition: all 0.2s;
+        cursor: pointer;
+        text-decoration: none;
+        color: inherit;
+        display: block;
+    }
+
+    .feature-card:hover {
+        border-color: var(--color-primary-element);
+        box-shadow: 0 4px 12px rgba(0, 103, 158, 0.15);
+        transform: translateY(-2px);
+    }
+
+    .feature-card h3 {
+        color: var(--color-primary-element);
+        font-size: 20px;
+        margin: 12px 0 8px 0;
+        font-weight: 600;
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .feature-card p {
+        color: var(--color-text-maxcontrast);
+        font-size: 14px;
+        line-height: 1.6;
+        margin: 8px 0 0 0;
+    }
+
+    .feature-icon {
+        width: 48px;
+        height: 48px;
+        background: var(--color-primary-element-light);
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-bottom: 8px;
+    }
+
+    .feature-icon svg {
+        width: 28px;
+        height: 28px;
+        fill: var(--color-primary-element);
+    }
+
+    .info-section {
+        background: var(--color-background-hover);
+        border-radius: var(--border-radius-large);
+        padding: 32px;
+        margin: 32px 0;
+    }
+
+    .info-section h2 {
+        color: var(--color-main-text);
+        font-size: 24px;
+        margin: 0 0 16px 0;
+        border: none;
+        padding: 0;
+    }
+
+    .info-section p {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 12px 0;
+    }
+
+    .info-section ul {
+        margin: 12px 0;
+        padding-left: 24px;
+    }
+
+    .info-section li {
+        color: var(--color-text-maxcontrast);
+        line-height: 1.7;
+        margin: 8px 0;
+    }
+
+    .info-section code {
+        background: var(--color-main-background);
+        padding: 2px 8px;
+        border-radius: var(--border-radius);
+        font-size: 13px;
+    }
+
+    .auth-status {
+        background: var(--color-primary-element-light);
+        border-left: 4px solid var(--color-primary-element);
+        padding: 16px 20px;
+        margin: 24px 0;
+        border-radius: var(--border-radius);
+        display: flex;
+        align-items: center;
+        gap: 12px;
+    }
+
+    .auth-status svg {
+        width: 24px;
+        height: 24px;
+        fill: var(--color-primary-element);
+        flex-shrink: 0;
+    }
+
+    .auth-status-text {
+        flex: 1;
+    }
+
+    .auth-status-text strong {
+        display: block;
+        color: var(--color-main-text);
+        font-size: 14px;
+        margin-bottom: 4px;
+    }
+
+    .auth-status-text span {
+        color: var(--color-text-maxcontrast);
+        font-size: 13px;
+    }
+{% endblock %}
+
+{% block content %}
+<div class="app-content-wrapper">
+    <!-- Main Content Area -->
+    <main id="app-content">
+        <div class="page-content">
+            <!-- Hero Section -->
+            <div class="hero-section">
+                <h1>Welcome to Nextcloud MCP Server</h1>
+                <p>
+                    Interactive user interface for semantic search and document retrieval.
+                    Test queries, visualize results, and explore your Nextcloud content using RAG workflows.
+                </p>
+            </div>
+
+            <!-- Authentication Status -->
+            <div class="auth-status">
+                <svg viewBox="0 0 24 24">
+                    <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                </svg>
+                <div class="auth-status-text">
+                    <strong>Authenticated as: {{ username }}</strong>
+                    <span>Authentication mode: <code>{{ auth_mode }}</code></span>
+                </div>
+            </div>
+
+            {% if vector_sync_enabled %}
+            <!-- Vector Sync Enabled Content -->
+            <div class="info-section">
+                <h2>About Semantic Search</h2>
+                <p>
+                    This interface provides access to <strong>semantic search</strong> capabilities powered by vector embeddings.
+                    Unlike traditional keyword search, semantic search understands the <em>meaning</em> of your queries and finds
+                    conceptually similar content across your Nextcloud apps.
+                </p>
+                <p>
+                    <strong>How it works:</strong>
+                </p>
+                <ul>
+                    <li>Documents from Notes, Calendar, Files, Contacts, and Deck are indexed into a vector database</li>
+                    <li>Each document chunk is converted to a 768-dimensional vector embedding that captures semantic meaning</li>
+                    <li>Queries are also converted to embeddings and matched against document vectors using similarity search</li>
+                    <li>Results can be retrieved using pure semantic search or hybrid BM25 search combining keywords and semantics</li>
+                </ul>
+            </div>
+
+            <div class="info-section">
+                <h2>RAG Workflow Integration</h2>
+                <p>
+                    This UI allows you to <strong>test the same queries that Large Language Models (LLMs) would use</strong> in a
+                    Retrieval-Augmented Generation (RAG) workflow. When an AI assistant needs to answer questions about your data:
+                </p>
+                <ul>
+                    <li><strong>Step 1:</strong> The assistant converts your question into a search query</li>
+                    <li><strong>Step 2:</strong> The MCP server retrieves relevant document chunks using semantic search</li>
+                    <li><strong>Step 3:</strong> Retrieved context is passed to the LLM to generate an informed answer</li>
+                </ul>
+
+                <!-- RAG Workflow Diagram -->
+                <div style="background: var(--color-main-background); border: 2px solid var(--color-primary-element); border-radius: var(--border-radius-large); padding: 24px; margin: 24px 0; font-family: 'SFMono-Regular', 'Consolas', 'Liberation Mono', 'Menlo', monospace; font-size: 13px; line-height: 1.8; overflow-x: auto;">
+                    <div style="text-align: center; font-weight: 600; margin-bottom: 16px; color: var(--color-primary-element); font-size: 14px;">
+                        MCP Sampling RAG Workflow
+                    </div>
+                    <pre style="margin: 0; color: var(--color-main-text);">
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  User asks: "What are health benefits of coffee?"
+│  (Claude Code)  │
+└────────┬────────┘
+         │ (1) User question
+         ↓
+┌────────────────────────────────────────────────────────────────────────┐
+│                      <strong>Nextcloud MCP Server</strong>                          │
+│  ┌──────────────────────────────────────────────────────────────────┐  │
+│  │ <strong>nc_semantic_search_answer</strong> Tool (MCP Sampling-enabled)      │  │
+│  │                                                                  │  │
+│  │  (2) Semantic Search                                             │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ Query: "health benefits of coffee"                     │     │  │
+│  │  │ → Convert to 768D vector embedding                     │     │  │
+│  │  │ → Search Qdrant (BM25 Hybrid + RRF fusion)             │     │  │
+│  │  │ → Retrieve top 5 relevant document chunks              │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (3) Construct Prompt with Context                               │  │
+│  │  ┌────────────────────────────────────────────────────────┐     │  │
+│  │  │ "What are health benefits of coffee?                   │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Documents:                                             │     │  │
+│  │  │  - [MED-2155] Effects of habitual coffee consumption...│     │  │
+│  │  │  - [MED-1646] Beverage consumption guidance...         │     │  │
+│  │  │  - [MED-1627] Coffee and depression risk...            │     │  │
+│  │  │  ...                                                    │     │  │
+│  │  │                                                         │     │  │
+│  │  │  Provide answer with citations."                        │     │  │
+│  │  └────────────────────────────────────────────────────────┘     │  │
+│  │                                                                  │  │
+│  │  (4) MCP Sampling Request                                        │  │
+│  │  ─────────────────────────────────────────────────────────────> │  │
+│  └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+         │
+         │ Sampling request with prompt + context
+         ↓
+┌─────────────────┐
+│   <strong>MCP Client</strong>   │  (5) Client's LLM generates answer using retrieved context
+│    (Claude)     │      → "Coffee consumption (2-3 cups/day) is associated with
+└────────┬────────┘         reduced risk of type 2 diabetes, cardiovascular disease,
+         │                  and improved liver health (Document 1, 2)..."
+         │
+         │ (6) Answer with citations
+         ↓
+┌─────────────────┐
+│      User       │  Receives comprehensive answer with source citations
+└─────────────────┘</pre>
+                </div>
+
+                <p style="margin-top: 16px;">
+                    <strong>Key Point:</strong> The MCP server retrieves context but doesn't generate answers itself.
+                    Through <strong>MCP sampling</strong>, it requests the client's LLM to generate responses, giving users
+                    full control over which model is used and ensuring all processing happens client-side.
+                </p>
+
+                <p>
+                    By using this interface, you can preview search results, understand relevance scores, and verify
+                    that the system retrieves the right information before it reaches the LLM.
+                </p>
+            </div>
+
+            <!-- Feature Cards -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-sync" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,18A6,6 0 0,1 6,12C6,11 6.25,10.03 6.7,9.2L5.24,7.74C4.46,8.97 4,10.43 4,12A8,8 0 0,0 12,20V23L16,19L12,15M12,4V1L8,5L12,9V6A6,6 0 0,1 18,12C18,13 17.75,13.97 17.3,14.8L18.76,16.26C19.54,15.03 20,13.57 20,12A8,8 0 0,0 12,4Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Sync Status</h3>
+                    <p>
+                        Monitor real-time indexing progress with metrics for indexed documents, pending queue,
+                        and synchronization status.
+                    </p>
+                </a>
+
+                <a href="/app/user-info#vector-viz" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M22,21H2V3H4V19H6V10H10V19H12V6H16V19H18V14H22V21Z" />
+                        </svg>
+                    </div>
+                    <h3>Vector Visualization</h3>
+                    <p>
+                        Interactive search interface with 2D PCA visualization. Compare algorithms,
+                        view relevance scores, and explore matched document chunks.
+                    </p>
+                </a>
+            </div>
+
+            {% else %}
+            <!-- Vector Sync Disabled Content -->
+            <div class="warning">
+                <h3 style="margin-top: 0;">Vector Sync is Disabled</h3>
+                <p>
+                    Semantic search and vector visualization features are currently disabled.
+                    To enable these features, set <code>VECTOR_SYNC_ENABLED=true</code> in your environment configuration.
+                </p>
+                <p style="margin-bottom: 0;">
+                    <strong>Learn more:</strong>
+                    <a href="https://github.com/YOUR_REPO/docs/configuration.md" target="_blank" style="color: inherit; text-decoration: underline;">
+                        Configuration Guide
+                    </a>
+                </p>
+            </div>
+
+            <!-- Limited Feature Card -->
+            <h2>Available Features</h2>
+            <div class="feature-grid">
+                <a href="/app/user-info" class="feature-card">
+                    <div class="feature-icon">
+                        <svg viewBox="0 0 24 24">
+                            <path d="M12,4A4,4 0 0,1 16,8A4,4 0 0,1 12,12A4,4 0 0,1 8,8A4,4 0 0,1 12,4M12,14C16.42,14 20,15.79 20,18V20H4V18C4,15.79 7.58,14 12,14Z" />
+                        </svg>
+                    </div>
+                    <h3>User Information</h3>
+                    <p>
+                        View your authentication details, session information, and IdP profile.
+                        Manage background access permissions.
+                    </p>
+                </a>
+            </div>
+            {% endif %}
+
+            <!-- Documentation Section -->
+            <div class="info-section" style="margin-top: 40px;">
+                <h2>Documentation</h2>
+                <p>
+                    For detailed information about configuration, authentication modes, and advanced features,
+                    please refer to the project documentation:
+                </p>
+                <ul>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/installation.md" target="_blank">Installation Guide</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/configuration.md" target="_blank">Configuration Options</a></li>
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/authentication.md" target="_blank">Authentication Modes</a></li>
+                    {% if vector_sync_enabled %}
+                    <li><a href="https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/docs/user-guide/vector-sync-ui.md" target="_blank">Vector Sync UI Guide</a></li>
+                    {% endif %}
+                </ul>
+            </div>
+        </div>
+    </main>
+</div>
+{% endblock %}
@@ -14,11 +14,11 @@ The Token Broker provides:
 - Session vs background token separation (RFC 8693)
 """

-import asyncio
 import logging
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Optional, Tuple

+import anyio
 import httpx
 import jwt
 from cryptography.fernet import Fernet
@@ -43,7 +43,7 @@ class TokenCache:
        self._cache: Dict[str, Tuple[str, datetime]] = {}
        self._ttl = timedelta(seconds=ttl_seconds)
        self._early_refresh = timedelta(seconds=early_refresh_seconds)
-        self._lock = asyncio.Lock()
+        self._lock = anyio.Lock()

    async def get(self, user_id: str) -> Optional[str]:
        """Get cached token if valid."""
@@ -9,24 +9,38 @@ For OAuth mode: Requires browser-based OAuth login to establish session.

 import logging
 import os
+from pathlib import Path
 from typing import Any

 import httpx
+from jinja2 import Environment, FileSystemLoader
 from starlette.authentication import requires
 from starlette.requests import Request
 from starlette.responses import HTMLResponse, JSONResponse

+from nextcloud_mcp_server.client import NextcloudClient
+
 logger = logging.getLogger(__name__)

+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))

-async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.AsyncClient:
-    """Get an authenticated HTTP client for user info page operations.
+
+async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudClient:
+    """Get an authenticated Nextcloud client for user info page operations.
+
+    This is a shared helper for authenticated routes that need to access
+    Nextcloud APIs. It handles both BasicAuth and OAuth authentication modes.

    Args:
        request: Starlette request object

    Returns:
-        Authenticated httpx.AsyncClient
+        Authenticated NextcloudClient
+
+    Raises:
+        RuntimeError: If credentials/session not configured
    """
    oauth_ctx = getattr(request.app.state, "oauth_context", None)

@@ -39,11 +53,15 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.Asyn
        if not all([nextcloud_host, username, password]):
            raise RuntimeError("BasicAuth credentials not configured")

-        assert nextcloud_host is not None  # Type narrowing for type checker
-        return httpx.AsyncClient(
+        from httpx import BasicAuth
+
+        assert nextcloud_host is not None
+        assert username is not None
+        assert password is not None
+        return NextcloudClient(
            base_url=nextcloud_host,
-            auth=(username, password),
-            timeout=30.0,
+            username=username,
+            auth=BasicAuth(username, password),
        )

    # OAuth mode - get token from session
@@ -58,15 +76,14 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.Asyn
        raise RuntimeError("No access token found in session")

    access_token = token_data["access_token"]
+    username = token_data.get("username")
    nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")

-    if not nextcloud_host:
-        raise RuntimeError("Nextcloud host not configured")
+    if not nextcloud_host or not username:
+        raise RuntimeError("Nextcloud host or username not configured")

-    return httpx.AsyncClient(
-        base_url=nextcloud_host,
-        headers={"Authorization": f"Bearer {access_token}"},
-        timeout=30.0,
+    return NextcloudClient.from_token(
+        base_url=nextcloud_host, token=access_token, username=username
    )


@@ -417,10 +434,10 @@ async def user_info_html(request: Request) -> HTMLResponse:
    try:
        from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin

-        # Get authenticated HTTP client
-        http_client = await _get_authenticated_client_for_userinfo(request)
-        is_admin = await is_nextcloud_admin(request, http_client)
-        await http_client.aclose()
+        # Get authenticated Nextcloud client
+        nc_client = await _get_authenticated_client_for_userinfo(request)
+        is_admin = await is_nextcloud_admin(request, nc_client._client)
+        await nc_client.close()
    except Exception as e:
        logger.warning(f"Failed to check admin status: {e}")
        # Default to not admin if check fails
@@ -431,51 +448,14 @@ async def user_info_html(request: Request) -> HTMLResponse:
        oauth_ctx = getattr(request.app.state, "oauth_context", None)
        login_url = str(request.url_for("oauth_login")) if oauth_ctx else "/oauth/login"

-        error_html = f"""
-        <!DOCTYPE html>
-        <html lang="en">
-        <head>
-            <meta charset="UTF-8">
-            <meta name="viewport" content="width=device-width, initial-scale=1.0">
-            <title>Error - Nextcloud MCP Server</title>
-            <style>
-                body {{
-                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                    max-width: 800px;
-                    margin: 50px auto;
-                    padding: 20px;
-                    background-color: #f5f5f5;
-                }}
-                .container {{
-                    background: white;
-                    border-radius: 8px;
-                    padding: 30px;
-                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-                }}
-                h1 {{
-                    color: #d32f2f;
-                    margin-top: 0;
-                }}
-                .error {{
-                    background-color: #ffebee;
-                    border-left: 4px solid #d32f2f;
-                    padding: 15px;
-                    margin: 20px 0;
-                }}
-            </style>
-        </head>
-        <body>
-            <div class="container">
-                <h1>Error Retrieving User Info</h1>
-                <div class="error">
-                    <strong>Error:</strong> {user_context["error"]}
-                </div>
-                <p><a href="{login_url}">Login again</a></p>
-            </div>
-        </body>
-        </html>
-        """
-        return HTMLResponse(content=error_html)
+        template = _jinja_env.get_template("error.html")
+        return HTMLResponse(
+            content=template.render(
+                error_title="Error Retrieving User Info",
+                error_message=user_context["error"],
+                login_url=login_url,
+            )
+        )

    # Build HTML response
    auth_mode = user_context.get("auth_mode", "unknown")
@@ -489,6 +469,16 @@ async def user_info_html(request: Request) -> HTMLResponse:
            str(request.url_for("oauth_logout")) if oauth_ctx else "/oauth/logout"
        )

+    # Get Nextcloud host for generating links to apps (used by viz tab)
+    # Use public issuer URL if available (for browser-accessible links),
+    # otherwise fall back to NEXTCLOUD_HOST from settings
+    from nextcloud_mcp_server.config import get_settings
+
+    settings = get_settings()
+    nextcloud_host_for_links = (
+        os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL") or settings.nextcloud_host
+    )
+
    # Build host info HTML (BasicAuth only)
    host_info_html = ""
    if auth_mode == "basic":
@@ -644,264 +634,26 @@ async def user_info_html(request: Request) -> HTMLResponse:
            </div>
        """

-    html_content = f"""
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>Nextcloud MCP Server</title>
+    # Check if vector sync is enabled (needed for Welcome tab)
+    vector_sync_enabled = os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"

-        <!-- htmx for dynamic loading -->
-        <script src="https://unpkg.com/htmx.org@1.9.10"></script>
-
-        <!-- Alpine.js for tab state management -->
-        <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
-
-        <style>
-            body {{
-                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
-                max-width: 900px;
-                margin: 50px auto;
-                padding: 20px;
-                background-color: #f5f5f5;
-            }}
-            .container {{
-                background: white;
-                border-radius: 8px;
-                padding: 30px;
-                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
-                min-height: calc(100vh - 200px);
-            }}
-            h1 {{
-                color: #0082c9;
-                margin-top: 0;
-                border-bottom: 2px solid #0082c9;
-                padding-bottom: 10px;
-            }}
-            h2 {{
-                color: #333;
-                margin-top: 20px;
-                border-bottom: 1px solid #e0e0e0;
-                padding-bottom: 5px;
-            }}
-
-            /* Tab navigation */
-            .tabs {{
-                display: flex;
-                gap: 0;
-                margin: 20px 0 0 0;
-                border-bottom: 2px solid #e0e0e0;
-            }}
-            .tab {{
-                padding: 12px 24px;
-                cursor: pointer;
-                background: transparent;
-                border: none;
-                font-size: 14px;
-                font-weight: 500;
-                color: #666;
-                border-bottom: 2px solid transparent;
-                margin-bottom: -2px;
-                transition: all 0.2s;
-            }}
-            .tab:hover {{
-                color: #0082c9;
-                background-color: #f5f5f5;
-            }}
-            .tab.active {{
-                color: #0082c9;
-                border-bottom-color: #0082c9;
-            }}
-
-            /* Tab content - use grid to overlay panes */
-            .tab-content {{
-                padding: 20px 0;
-                display: grid;
-            }}
-
-            /* Tab panes - all occupy the same grid cell to overlay */
-            .tab-pane {{
-                grid-area: 1 / 1;
-            }}
-
-            /* Tables */
-            table {{
-                width: 100%;
-                border-collapse: collapse;
-                margin: 15px 0;
-            }}
-            td {{
-                padding: 10px;
-                border-bottom: 1px solid #e0e0e0;
-            }}
-            td:first-child {{
-                width: 200px;
-                color: #666;
-            }}
-            code {{
-                background-color: #f5f5f5;
-                padding: 2px 6px;
-                border-radius: 3px;
-                font-family: 'Courier New', monospace;
-            }}
-
-            /* Badges */
-            .badge {{
-                display: inline-block;
-                padding: 3px 8px;
-                border-radius: 12px;
-                font-size: 12px;
-                font-weight: bold;
-                text-transform: uppercase;
-            }}
-            .badge-oauth {{
-                background-color: #4caf50;
-                color: white;
-            }}
-            .badge-basic {{
-                background-color: #2196f3;
-                color: white;
-            }}
-
-            /* Messages */
-            .warning {{
-                background-color: #fff3cd;
-                border-left: 4px solid #ffc107;
-                padding: 15px;
-                margin: 15px 0;
-                color: #856404;
-            }}
-            .info-message {{
-                background-color: #e3f2fd;
-                border-left: 4px solid #2196f3;
-                padding: 15px;
-                margin: 15px 0;
-                color: #1565c0;
-            }}
-
-            /* Buttons */
-            .button {{
-                display: inline-block;
-                padding: 10px 20px;
-                background-color: #d32f2f;
-                color: white;
-                text-decoration: none;
-                border-radius: 4px;
-                transition: background-color 0.3s;
-                border: none;
-                cursor: pointer;
-                font-size: 14px;
-            }}
-            .button:hover {{
-                background-color: #b71c1c;
-            }}
-            .button-primary {{
-                background-color: #0082c9;
-            }}
-            .button-primary:hover {{
-                background-color: #006ba3;
-            }}
-
-            /* Logout section */
-            .logout {{
-                margin-top: 30px;
-                padding-top: 20px;
-                border-top: 1px solid #e0e0e0;
-            }}
-
-            /* Smooth htmx content swaps */
-            .htmx-swapping {{
-                opacity: 0;
-                transition: opacity 200ms ease-out;
-            }}
-
-            /* Smooth htmx content settling */
-            .htmx-settling {{
-                opacity: 1;
-                transition: opacity 200ms ease-in;
-            }}
-        </style>
-    </head>
-    <body>
-        <div class="container" x-data="{{ activeTab: 'user-info' }}">
-            <h1>Nextcloud MCP Server</h1>
-
-            <!-- Tab Navigation -->
-            <div class="tabs">
-                <button
-                    class="tab"
-                    :class="activeTab === 'user-info' ? 'active' : ''"
-                    @click="activeTab = 'user-info'">
-                    User Info
-                </button>
-                {
-        ""
-        if not show_vector_sync_tab
-        else '''
-                <button
-                    class="tab"
-                    :class="activeTab === 'vector-sync' ? 'active' : ''"
-                    @click="activeTab = 'vector-sync'">
-                    Vector Sync
-                </button>
-                '''
-    }
-                {
-        ""
-        if not show_webhooks_tab
-        else '''
-                <button
-                    class="tab"
-                    :class="activeTab === 'webhooks' ? 'active' : ''"
-                    @click="activeTab = 'webhooks'">
-                    Webhooks
-                </button>
-                '''
-    }
-            </div>
-
-            <!-- Tab Content -->
-            <div class="tab-content">
-                <!-- User Info Tab -->
-                <div class="tab-pane" x-show="activeTab === 'user-info'" x-transition.opacity.duration.150ms>
-                    {user_info_tab_html}
-                </div>
-
-                {
-        ""
-        if not show_vector_sync_tab
-        else f'''
-                <!-- Vector Sync Tab -->
-                <div class="tab-pane" x-show="activeTab === 'vector-sync'" x-transition.opacity.duration.150ms>
-                    {vector_sync_tab_html}
-                </div>
-                '''
-    }
-
-                {
-        ""
-        if not show_webhooks_tab
-        else f'''
-                <!-- Webhooks Tab (admin-only, loaded dynamically) -->
-                <div class="tab-pane" x-show="activeTab === 'webhooks'" x-transition.opacity.duration.150ms>
-                    {webhooks_tab_html}
-                </div>
-                '''
-    }
-            </div>
-
-            {
-        f'<div class="logout"><a href="{logout_url}" class="button">Logout</a></div>'
-        if auth_mode == "oauth"
-        else ""
-    }
-        </div>
-    </body>
-    </html>
-    """
-
-    return HTMLResponse(content=html_content)
+    # Render template
+    template = _jinja_env.get_template("user_info.html")
+    return HTMLResponse(
+        content=template.render(
+            user_info_tab_html=user_info_tab_html,
+            vector_sync_tab_html=vector_sync_tab_html,
+            webhooks_tab_html=webhooks_tab_html,
+            show_vector_sync_tab=show_vector_sync_tab,
+            show_webhooks_tab=show_webhooks_tab,
+            logout_url=logout_url if auth_mode == "oauth" else None,
+            nextcloud_host_for_links=nextcloud_host_for_links,
+            # Additional context for Welcome tab
+            vector_sync_enabled=vector_sync_enabled,
+            username=username,
+            auth_mode=auth_mode,
+        )
+    )


@requires("authenticated", redirect="oauth_login")
@@ -921,17 +673,12 @@ async def revoke_session(request: Request) -> HTMLResponse:
    oauth_ctx = getattr(request.app.state, "oauth_context", None)

    if not oauth_ctx:
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            """
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>OAuth mode not enabled</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message="OAuth mode not enabled",
+            ),
            status_code=400,
        )

@@ -939,17 +686,12 @@ async def revoke_session(request: Request) -> HTMLResponse:
    session_id = request.cookies.get("mcp_session")

    if not storage or not session_id:
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            """
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>Session not found</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message="Session not found",
+            ),
            status_code=400,
        )

@@ -962,57 +704,26 @@ async def revoke_session(request: Request) -> HTMLResponse:
        # Redirect back to user page
        user_page_url = str(request.url_for("user_info_html"))

+        template = _jinja_env.get_template("success.html")
        return HTMLResponse(
-            f"""
-            <!DOCTYPE html>
-            <html lang="en">
-            <head>
-                <meta charset="UTF-8">
-                <meta http-equiv="refresh" content="2;url={user_page_url}">
-                <title>Background Access Revoked</title>
-                <style>
-                    body {{
-                        font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
-                        max-width: 600px;
-                        margin: 50px auto;
-                        padding: 20px;
-                        text-align: center;
-                    }}
-                    .success {{
-                        background-color: #e8f5e9;
-                        border: 2px solid #4caf50;
-                        padding: 30px;
-                        border-radius: 8px;
-                    }}
-                    h1 {{
-                        color: #4caf50;
-                    }}
-                </style>
-            </head>
-            <body>
-                <div class="success">
-                    <h1>✓ Background Access Revoked</h1>
-                    <p>Your refresh token has been deleted successfully.</p>
-                    <p>Browser session remains active.</p>
-                    <p>Redirecting back to user page...</p>
-                </div>
-            </body>
-            </html>
-            """
+            content=template.render(
+                success_title="✓ Background Access Revoked",
+                success_messages=[
+                    "Your refresh token has been deleted successfully.",
+                    "Browser session remains active.",
+                ],
+                redirect_url=user_page_url,
+                redirect_delay=2,
+            )
        )

    except Exception as e:
        logger.error(f"Failed to revoke background access: {e}")
+        template = _jinja_env.get_template("error.html")
        return HTMLResponse(
-            f"""
-            <!DOCTYPE html>
-            <html>
-            <head><title>Error</title></head>
-            <body>
-                <h1>Error</h1>
-                <p>Failed to revoke background access: {e}</p>
-            </body>
-            </html>
-            """,
+            content=template.render(
+                error_title="Error",
+                error_message=f"Failed to revoke background access: {e}",
+            ),
            status_code=500,
        )
@@ -0,0 +1,669 @@
+"""Vector visualization routes for testing search algorithms.
+
+Provides a web UI for users to test different search algorithms on their own
+indexed documents and visualize results in 3D space using PCA.
+
+All processing happens server-side following ADR-012:
+- Search execution via shared search/algorithms.py
+- Query embedding generation
+- PCA dimensionality reduction (768-dim → 3D)
+- Only 3D coordinates + metadata sent to client
+- Bandwidth-efficient (3 floats per doc vs 768)
+"""
+
+import logging
+import time
+from pathlib import Path
+
+import numpy as np
+from jinja2 import Environment, FileSystemLoader
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse, JSONResponse
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.observability.tracing import trace_operation
+from nextcloud_mcp_server.search import (
+    BM25HybridSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)
+from nextcloud_mcp_server.vector.pca import PCA
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+# Setup Jinja2 environment for templates
+_template_dir = Path(__file__).parent / "templates"
+_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_html(request: Request) -> HTMLResponse:
+    """Vector visualization page with search controls and interactive plot.
+
+    Provides UI for testing search algorithms with real-time visualization.
+    Requires vector sync to be enabled.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML page with search interface
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return HTMLResponse(
+            """
+            <div>
+                <h2>Vector Visualization</h2>
+                <div style="padding: 20px; background: #fff3cd; border: 1px solid #ffc107; border-radius: 4px;">
+                    Vector sync is not enabled. Set VECTOR_SYNC_ENABLED=true to use this feature.
+                </div>
+            </div>
+            """
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name
+        if hasattr(request.user, "display_name")
+        else "unknown"
+    )
+
+    # Load and render template
+    template = _jinja_env.get_template("vector_viz.html")
+    html_content = template.render(username=username)
+    return HTMLResponse(content=html_content)
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_search(request: Request) -> JSONResponse:
+    """Execute server-side search and return 3D coordinates + results.
+
+    All processing happens server-side:
+    1. Execute search via shared algorithm module
+    2. Generate query embedding
+    3. Fetch matching vectors from Qdrant
+    4. Apply PCA reduction (768-dim → 3D) to query + documents
+    5. Return coordinates + metadata only
+
+    Args:
+        request: Starlette request with query parameters
+
+    Returns:
+        JSON response with coordinates_3d and results (including query point)
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"success": False, "error": "Vector sync not enabled"},
+            status_code=400,
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name if hasattr(request.user, "display_name") else None
+    )
+
+    if not username:
+        return JSONResponse(
+            {"success": False, "error": "User not authenticated"},
+            status_code=401,
+        )
+
+    # Parse query parameters
+    query = request.query_params.get("query", "")
+    algorithm = request.query_params.get("algorithm", "bm25_hybrid")
+    limit = int(request.query_params.get("limit", "50"))
+    score_threshold = float(request.query_params.get("score_threshold", "0.0"))
+    fusion = request.query_params.get("fusion", "rrf")  # Default to RRF
+
+    # Parse doc_types (comma-separated list, None = all types)
+    doc_types_param = request.query_params.get("doc_types", "")
+    doc_types = doc_types_param.split(",") if doc_types_param else None
+
+    logger.info(
+        f"Viz search: user={username}, query='{query}', "
+        f"algorithm={algorithm}, fusion={fusion}, limit={limit}, doc_types={doc_types}"
+    )
+
+    try:
+        # Start total request timer
+        request_start = time.perf_counter()
+        # Get authenticated HTTP client from session
+        # In BasicAuth mode: uses username/password from session
+        # In OAuth mode: uses access token from session
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+
+        with trace_operation("vector_viz.get_auth_client"):
+            auth_client_ctx = await _get_authenticated_client_for_userinfo(request)
+
+        async with auth_client_ctx as nc_client:  # noqa: F841
+            # Create search algorithm (no client needed - verification removed)
+            if algorithm == "semantic":
+                search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+            elif algorithm == "bm25_hybrid":
+                search_algo = BM25HybridSearchAlgorithm(
+                    score_threshold=score_threshold, fusion=fusion
+                )
+            else:
+                return JSONResponse(
+                    {"success": False, "error": f"Unknown algorithm: {algorithm}"},
+                    status_code=400,
+                )
+
+            # Execute search (supports cross-app when doc_types=None)
+            # Get unverified results with buffer for filtering
+            search_start = time.perf_counter()
+            all_results = []
+            if doc_types is None or len(doc_types) == 0:
+                # Cross-app search - search all indexed types
+                with trace_operation(
+                    "vector_viz.search_execute",
+                    attributes={
+                        "search.algorithm": algorithm,
+                        "search.limit": limit * 2,
+                        "search.doc_type": "all",
+                    },
+                ):
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Buffer for verification filtering
+                        doc_type=None,  # Search all types
+                        score_threshold=score_threshold,
+                    )
+                all_results.extend(unverified_results)
+            else:
+                # Search each document type and combine
+                for doc_type in doc_types:
+                    with trace_operation(
+                        "vector_viz.search_execute",
+                        attributes={
+                            "search.algorithm": algorithm,
+                            "search.limit": limit * 2,
+                            "search.doc_type": doc_type,
+                        },
+                    ):
+                        unverified_results = await search_algo.search(
+                            query=query,
+                            user_id=username,
+                            limit=limit * 2,  # Buffer for verification filtering
+                            doc_type=doc_type,
+                            score_threshold=score_threshold,
+                        )
+                    all_results.extend(unverified_results)
+                # Sort by score before verification
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # No verification needed for visualization - we only need Qdrant metadata
+            # (title, excerpt, doc_type) which is already in search results.
+            # Verification is only needed for sampling (LLM needs full content).
+            search_results = all_results[:limit]
+            search_duration = time.perf_counter() - search_start
+
+        # Store original scores and normalize for visualization
+        # (best result = 1.0, worst result = 0.0 within THIS result set)
+        # This makes visual encoding meaningful regardless of RRF normalization
+        with trace_operation(
+            "vector_viz.score_normalize",
+            attributes={"normalize.num_results": len(search_results)},
+        ):
+            if search_results:
+                scores = [r.score for r in search_results]
+                min_score, max_score = min(scores), max(scores)
+                score_range = max_score - min_score if max_score > min_score else 1.0
+
+                logger.info(
+                    f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
+                    f"→ [0.0, 1.0]"
+                )
+
+                # Store original score and rescale to 0-1 for visualization
+                for r in search_results:
+                    # Store original score before normalization
+                    r.original_score = r.score
+                    # Rescale for visual encoding
+                    r.score = (r.score - min_score) / score_range
+
+        if not search_results:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [],
+                    "coordinates_3d": [],
+                    "query_coords": [],
+                    "message": "No results found",
+                }
+            )
+
+        # Fetch vectors for specific matching chunks from Qdrant using batch retrieve
+        vector_fetch_start = time.perf_counter()
+
+        with trace_operation("vector_viz.get_qdrant_client"):
+            qdrant_client = await get_qdrant_client()
+
+        chunk_vectors_map = {}  # Map (doc_id, chunk_start, chunk_end) -> vector
+
+        # Collect point IDs from search results for batch retrieval
+        # point_id is the Qdrant internal ID returned by search algorithms
+        point_ids = [r.point_id for r in search_results if r.point_id]
+
+        if point_ids:
+            # Single batch retrieve call instead of N sequential scroll calls
+            # This is ~50x faster for 50 results (1 HTTP request vs 50)
+            with trace_operation(
+                "vector_viz.vector_retrieve",
+                attributes={"retrieve.num_points": len(point_ids)},
+            ):
+                points_response = await qdrant_client.retrieve(
+                    collection_name=settings.get_collection_name(),
+                    ids=point_ids,
+                    with_vectors=["dense"],
+                    with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
+                )
+
+            # Build chunk_vectors_map from batch response
+            for point in points_response:
+                if point.vector is not None:
+                    # Extract dense vector (handle both named and unnamed vectors)
+                    if isinstance(point.vector, dict):
+                        vector = point.vector.get("dense")
+                    else:
+                        vector = point.vector
+
+                    if vector is not None and point.payload:
+                        doc_id = point.payload.get("doc_id")
+                        chunk_start = point.payload.get("chunk_start_offset")
+                        chunk_end = point.payload.get("chunk_end_offset")
+                        chunk_key = (doc_id, chunk_start, chunk_end)
+                        chunk_vectors_map[chunk_key] = vector
+
+        vector_fetch_duration = time.perf_counter() - vector_fetch_start
+
+        if len(chunk_vectors_map) < 2:
+            # Not enough chunks for PCA
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [
+                        {
+                            "id": r.id,
+                            "doc_type": r.doc_type,
+                            "title": r.title,
+                            "excerpt": r.excerpt,
+                            "score": r.score,
+                        }
+                        for r in search_results
+                    ],
+                    "coordinates_3d": [[0, 0, 0]] * len(search_results),
+                    "query_coords": [0, 0, 0],
+                    "message": "Not enough chunks for PCA",
+                }
+            )
+
+        # Detect embedding dimension from first available vector
+        embedding_dim = None
+        for vector in chunk_vectors_map.values():
+            if vector is not None:
+                embedding_dim = len(vector)
+                break
+
+        if embedding_dim is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Could not determine embedding dimension",
+                },
+                status_code=500,
+            )
+
+        logger.info(f"Detected embedding dimension: {embedding_dim}")
+
+        # Build chunk vectors array in search_results order (1:1 mapping)
+        chunk_vectors = []
+        for result in search_results:
+            chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
+            if chunk_key in chunk_vectors_map:
+                chunk_vectors.append(chunk_vectors_map[chunk_key])
+            else:
+                # Chunk not found in vectors (shouldn't happen)
+                logger.warning(
+                    f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
+                )
+                # Use zero vector as fallback
+                chunk_vectors.append(np.zeros(embedding_dim))
+
+        chunk_vectors = np.array(chunk_vectors)
+
+        # Reuse query embedding from search algorithm (avoids redundant embedding call)
+        query_embed_start = time.perf_counter()
+        if search_algo.query_embedding is not None:
+            query_embedding = search_algo.query_embedding
+            logger.info(
+                f"Reusing query embedding from search algorithm "
+                f"(dimension={len(query_embedding)})"
+            )
+        else:
+            # Fallback: generate embedding if not available from search
+            from nextcloud_mcp_server.embedding.service import get_embedding_service
+
+            embedding_service = get_embedding_service()
+            query_embedding = await embedding_service.embed(query)
+            logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
+        query_embed_duration = time.perf_counter() - query_embed_start
+
+        # Combine query vector with chunk vectors for PCA
+        # Query will be the last point in the array
+        all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
+
+        # Normalize vectors to unit length (L2 normalization)
+        # This is critical because Qdrant uses COSINE distance, which only measures
+        # vector direction (angle), not magnitude. PCA uses Euclidean distance which
+        # considers both direction and magnitude. By normalizing to unit length,
+        # Euclidean distances in PCA space will match cosine distances.
+        norms = np.linalg.norm(all_vectors, axis=1, keepdims=True)
+
+        # Check for zero-norm vectors (can happen with empty/corrupted embeddings)
+        zero_norm_mask = norms[:, 0] < 1e-10
+        if zero_norm_mask.any():
+            zero_indices = np.where(zero_norm_mask)[0]
+            logger.warning(
+                f"Found {zero_norm_mask.sum()} zero-norm vectors at indices {zero_indices.tolist()}. "
+                "Replacing with small epsilon to avoid division by zero."
+            )
+            # Replace zero norms with small epsilon to avoid NaN
+            norms[zero_norm_mask] = 1e-10
+
+        all_vectors_normalized = all_vectors / norms
+        logger.info(
+            f"Normalized vectors: query_norm={norms[-1][0]:.3f}, "
+            f"doc_norm_range=[{norms[:-1].min():.3f}, {norms[:-1].max():.3f}]"
+        )
+
+        # Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
+        # Run in thread pool to avoid blocking the event loop (CPU-bound)
+        pca_start = time.perf_counter()
+
+        def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
+            pca = PCA(n_components=3)
+            coords = pca.fit_transform(vectors)
+            return coords, pca
+
+        import anyio
+
+        with trace_operation(
+            "vector_viz.pca_compute",
+            attributes={
+                "pca.num_vectors": len(all_vectors_normalized),
+                "pca.embedding_dim": embedding_dim,
+            },
+        ):
+            coords_3d, pca = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                lambda: _compute_pca(all_vectors_normalized)
+            )
+        pca_duration = time.perf_counter() - pca_start
+
+        # After fit, these attributes are guaranteed to be set
+        assert pca.explained_variance_ratio_ is not None
+
+        # Check for NaN values in PCA output (numerical instability)
+        nan_mask = np.isnan(coords_3d)
+        if nan_mask.any():
+            nan_rows = np.where(nan_mask.any(axis=1))[0]
+            logger.error(
+                f"Found NaN values in PCA output at {len(nan_rows)} points: {nan_rows.tolist()[:10]}. "
+                "Replacing NaN with 0.0 to prevent JSON serialization error."
+            )
+            # Replace NaN with 0 to allow JSON serialization
+            coords_3d = np.nan_to_num(coords_3d, nan=0.0)
+
+        # Split query coords from chunk coords
+        # Round to 2 decimal places for cleaner display
+        query_coords_3d = [
+            round(float(x), 2) for x in coords_3d[-1]
+        ]  # Last point is query
+        chunk_coords_3d = coords_3d[:-1]  # All but last are chunks
+
+        logger.info(
+            f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
+            f"PC2={pca.explained_variance_ratio_[1]:.3f}, "
+            f"PC3={pca.explained_variance_ratio_[2]:.3f}"
+        )
+        logger.info(
+            f"Embedding stats: chunks={len(chunk_vectors)}, "
+            f"query_dim={len(query_embedding)}, chunk_vector_dim={chunk_vectors.shape[1] if chunk_vectors.size > 0 else 0}"
+        )
+
+        # Coordinates already match search_results order (1:1 mapping)
+        result_coords = [
+            [round(float(x), 2) for x in coord] for coord in chunk_coords_3d
+        ]
+
+        # Build response
+        response_results = [
+            {
+                "id": r.id,
+                "doc_type": r.doc_type,
+                "title": r.title,
+                "excerpt": r.excerpt,
+                "score": r.score,  # Normalized score for visual encoding (0-1)
+                "original_score": getattr(
+                    r, "original_score", r.score
+                ),  # Raw score from algorithm
+                "chunk_start_offset": r.chunk_start_offset,
+                "chunk_end_offset": r.chunk_end_offset,
+            }
+            for r in search_results
+        ]
+
+        # Calculate total request duration
+        total_duration = time.perf_counter() - request_start
+
+        # Log comprehensive timing metrics
+        logger.info(
+            f"Viz search timing: total={total_duration * 1000:.1f}ms, "
+            f"search={search_duration * 1000:.1f}ms ({search_duration / total_duration * 100:.1f}%), "
+            f"vector_fetch={vector_fetch_duration * 1000:.1f}ms ({vector_fetch_duration / total_duration * 100:.1f}%), "
+            f"query_embed={query_embed_duration * 1000:.1f}ms ({query_embed_duration / total_duration * 100:.1f}%), "
+            f"pca={pca_duration * 1000:.1f}ms ({pca_duration / total_duration * 100:.1f}%), "
+            f"results={len(search_results)}, chunk_vectors={len(chunk_vectors)}"
+        )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "results": response_results,
+                "coordinates_3d": result_coords[: len(search_results)],
+                "query_coords": query_coords_3d,
+                "pca_variance": {
+                    "pc1": float(pca.explained_variance_ratio_[0]),
+                    "pc2": float(pca.explained_variance_ratio_[1]),
+                    "pc3": float(pca.explained_variance_ratio_[2]),
+                },
+                "timing": {
+                    "total_ms": round(total_duration * 1000, 2),
+                    "search_ms": round(search_duration * 1000, 2),
+                    "vector_fetch_ms": round(vector_fetch_duration * 1000, 2),
+                    "query_embed_ms": round(query_embed_duration * 1000, 2),
+                    "pca_ms": round(pca_duration * 1000, 2),
+                    "num_results": len(search_results),
+                    "num_chunk_vectors": len(chunk_vectors),
+                },
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Viz search error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
+
+
+@requires("authenticated", redirect="oauth_login")
+async def chunk_context_endpoint(request: Request) -> JSONResponse:
+    """Fetch chunk text with surrounding context for visualization.
+
+    This endpoint retrieves the matched chunk along with surrounding text
+    to provide context for the search result. Used by the viz pane to
+    display chunks inline.
+
+    Query parameters:
+        doc_type: Document type (e.g., "note")
+        doc_id: Document ID
+        start: Chunk start offset (character position)
+        end: Chunk end offset (character position)
+        context: Characters of context before/after (default: 500)
+
+    Returns:
+        JSON with chunk_text, before_context, after_context, and flags
+    """
+    try:
+        # Get query parameters
+        doc_type = request.query_params.get("doc_type")
+        doc_id = request.query_params.get("doc_id")
+        start_str = request.query_params.get("start")
+        end_str = request.query_params.get("end")
+        context_chars = int(request.query_params.get("context", "500"))
+
+        # Validate required parameters
+        if not all([doc_type, doc_id, start_str, end_str]):
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Missing required parameters: doc_type, doc_id, start, end",
+                },
+                status_code=400,
+            )
+
+        # Type assertions - we validated these above
+        assert doc_type is not None
+        assert doc_id is not None
+        assert start_str is not None
+        assert end_str is not None
+
+        start = int(start_str)
+        end = int(end_str)
+        # Convert doc_id to int (all document types use int IDs)
+        doc_id_int = int(doc_id)
+
+        # Get authenticated Nextcloud client
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+        from nextcloud_mcp_server.search.context import get_chunk_with_context
+
+        # Use context expansion module to fetch chunk with surrounding context
+        async with await _get_authenticated_client_for_userinfo(request) as nc_client:
+            chunk_context = await get_chunk_with_context(
+                nc_client=nc_client,
+                user_id=request.user.display_name,  # User ID from auth
+                doc_id=doc_id_int,
+                doc_type=doc_type,
+                chunk_start=start,
+                chunk_end=end,
+                context_chars=context_chars,
+            )
+
+        # Check if context expansion succeeded
+        if chunk_context is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
+                },
+                status_code=404,
+            )
+
+        logger.info(
+            f"Fetched chunk context for {doc_type}_{doc_id}: "
+            f"chunk_len={len(chunk_context.chunk_text)}, "
+            f"before_len={len(chunk_context.before_context)}, "
+            f"after_len={len(chunk_context.after_context)}"
+        )
+
+        # For PDF files, also fetch the highlighted page image from Qdrant
+        highlighted_page_image = None
+        page_number = None
+        if doc_type == "file":
+            try:
+                from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+                settings = get_settings()
+                qdrant_client = await get_qdrant_client()
+                username = request.user.display_name
+
+                # Query for this specific chunk's highlighted image
+                points_response = await qdrant_client.scroll(
+                    collection_name=settings.get_collection_name(),
+                    scroll_filter=Filter(
+                        must=[
+                            get_placeholder_filter(),
+                            FieldCondition(
+                                key="doc_id", match=MatchValue(value=doc_id_int)
+                            ),
+                            FieldCondition(
+                                key="user_id", match=MatchValue(value=username)
+                            ),
+                            FieldCondition(
+                                key="chunk_start_offset", match=MatchValue(value=start)
+                            ),
+                            FieldCondition(
+                                key="chunk_end_offset", match=MatchValue(value=end)
+                            ),
+                        ]
+                    ),
+                    limit=1,
+                    with_vectors=False,
+                    with_payload=["highlighted_page_image", "page_number"],
+                )
+
+                points = points_response[0]
+                if points and points[0].payload:
+                    highlighted_page_image = points[0].payload.get(
+                        "highlighted_page_image"
+                    )
+                    page_number = points[0].payload.get("page_number")
+                    if highlighted_page_image:
+                        logger.info(
+                            f"Found highlighted image for chunk: "
+                            f"page={page_number}, image_size={len(highlighted_page_image)}"
+                        )
+            except Exception as e:
+                logger.warning(f"Failed to fetch highlighted image: {e}")
+
+        # Return response compatible with frontend expectations
+        response_data: dict = {
+            "success": True,
+            "chunk_text": chunk_context.chunk_text,
+            "before_context": chunk_context.before_context,
+            "after_context": chunk_context.after_context,
+            "has_more_before": chunk_context.has_before_truncation,
+            "has_more_after": chunk_context.has_after_truncation,
+        }
+
+        # Add image data if available
+        if highlighted_page_image:
+            response_data["highlighted_page_image"] = highlighted_page_image
+            response_data["page_number"] = page_number
+
+        return JSONResponse(response_data)
+
+    except ValueError as e:
+        logger.error(f"Invalid parameter format: {e}")
+        return JSONResponse(
+            {"success": False, "error": f"Invalid parameter format: {e}"},
+            status_code=400,
+        )
+    except Exception as e:
+        logger.error(f"Chunk context error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
@@ -29,9 +29,9 @@ from .app import get_app
@click.option(
    "--transport",
    "-t",
-    default="sse",
+    default="streamable-http",
    show_default=True,
-    type=click.Choice(["sse", "streamable-http", "http"]),
+    type=click.Choice(["streamable-http", "http"]),
    help="MCP transport protocol",
 )
@click.option(
@@ -130,10 +130,75 @@ class NextcloudClient:
        all_notes = self.notes.get_all_notes()
        return await self._notes_search.search_notes(all_notes, query)

+    async def find_files_by_tag(
+        self, tag_name: str, mime_type_filter: str | None = None
+    ) -> list[dict]:
+        """Find files by system tag name, optionally filtered by MIME type.
+
+        This method coordinates tag lookup and file retrieval via WebDAV:
+        1. Look up the tag ID by name
+        2. Get all files with that tag (via REPORT with full metadata)
+        3. Optionally filter by MIME type
+
+        Args:
+            tag_name: Name of the system tag to search for (e.g., "vector-index")
+            mime_type_filter: Optional MIME type filter (e.g., "application/pdf")
+
+        Returns:
+            List of file dictionaries with WebDAV properties (path, size, content_type, etc.)
+
+        Raises:
+            RuntimeError: If tag lookup or file query fails
+
+        Examples:
+            # Find all files with "vector-index" tag
+            files = await nc_client.find_files_by_tag("vector-index")
+
+            # Find only PDFs with the tag
+            pdfs = await nc_client.find_files_by_tag("vector-index", "application/pdf")
+        """
+        # Look up tag by name using WebDAV
+        tag = await self.webdav.get_tag_by_name(tag_name)
+        if not tag:
+            logger.debug(f"Tag '{tag_name}' not found, returning empty list")
+            return []
+
+        # Get files with this tag (returns full file info from REPORT)
+        files = await self.webdav.get_files_by_tag(tag["id"])
+        if not files:
+            logger.debug(f"No files found with tag '{tag_name}'")
+            return []
+
+        logger.debug(f"Found {len(files)} files with tag '{tag_name}'")
+
+        # Apply MIME type filter if specified
+        if mime_type_filter:
+            filtered_files = [
+                f
+                for f in files
+                if f.get("content_type", "").startswith(mime_type_filter)
+            ]
+            logger.info(
+                f"Returning {len(filtered_files)} files with tag '{tag_name}' (filtered by {mime_type_filter})"
+            )
+            return filtered_files
+
+        logger.info(f"Returning {len(files)} files with tag '{tag_name}'")
+        return files
+
    def _get_webdav_base_path(self) -> str:
        """Helper to get the base WebDAV path for the authenticated user."""
        return f"/remote.php/dav/files/{self.username}"

+    async def __aenter__(self):
+        """Async context manager entry."""
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit - closes all clients."""
+        await self.close()
+        return False  # Don't suppress exceptions
+
    async def close(self):
        """Close the HTTP client and CalDAV client."""
        await self._client.aclose()
@@ -5,6 +5,7 @@ import time
 from abc import ABC
 from functools import wraps

+import anyio
 from httpx import AsyncClient, HTTPStatusError, RequestError, codes

 from nextcloud_mcp_server.observability.metrics import (
@@ -47,7 +48,7 @@ def retry_on_429(func):
                    # Record retry metric (extract app name from args if available)
                    if len(args) > 0 and hasattr(args[0], "app_name"):
                        record_nextcloud_api_retry(app=args[0].app_name, reason="429")
-                    time.sleep(5)
+                    await anyio.sleep(5)
                elif e.response.status_code == 404:
                    # 404 errors are often expected (e.g., checking if attachments exist)
                    # Log as debug instead of warning
@@ -40,7 +40,7 @@ class NotesClient(BaseNextcloudClient):
        seen_ids: set[int] = set()

        while True:
-            params: Dict[str, Any] = {"chunkSize": 10}
+            params: Dict[str, Any] = {"chunkSize": 100}
            if cursor:
                params["chunkCursor"] = cursor
            if prune_before is not None:
@@ -821,6 +821,20 @@ class WebDAVClient(BaseNextcloudClient):
                    item["file_id"] = int(value) if value else None
                elif tag == "favorite":
                    item["is_favorite"] = value == "1"
+                elif tag == "tags":
+                    # Tags can be comma-separated or have multiple child elements
+                    if value:
+                        # Handle comma-separated tags
+                        item["tags"] = [
+                            t.strip() for t in value.split(",") if t.strip()
+                        ]
+                    else:
+                        # Check for child tag elements (alternative format)
+                        tag_elements = child.findall(".//{http://owncloud.org/ns}tag")
+                        if tag_elements:
+                            item["tags"] = [t.text for t in tag_elements if t.text]
+                        else:
+                            item["tags"] = []
                elif tag == "permissions":
                    item["permissions"] = value
                elif tag == "size":
@@ -948,3 +962,570 @@ class WebDAVClient(BaseNextcloudClient):
            properties=properties,
            limit=limit,
        )
+
+    async def find_by_tag(
+        self, tag_name: str, scope: str = "", limit: Optional[int] = None
+    ) -> List[Dict[str, Any]]:
+        """Find files by tag name.
+
+        DEPRECATED: Use NextcloudClient.find_files_by_tag() instead, which uses
+        the proper OCS Tags API rather than WebDAV SEARCH.
+
+        Args:
+            tag_name: Tag to filter by (e.g., "vector-index")
+            scope: Directory path to search in (empty string for user root)
+            limit: Maximum number of results to return
+
+        Returns:
+            List of files/directories with the specified tag
+
+        Examples:
+            # Find all files tagged with "vector-index"
+            results = await find_by_tag("vector-index")
+
+            # Find tagged files in a specific folder
+            results = await find_by_tag("vector-index", scope="Documents")
+        """
+        # Use LIKE for tag matching since tags can be comma-separated
+        where_conditions = f"""
+            <d:like>
+                <d:prop>
+                    <oc:tags/>
+                </d:prop>
+                <d:literal>%{tag_name}%</d:literal>
+            </d:like>
+        """
+
+        # Request tag property along with standard properties
+        properties = [
+            "displayname",
+            "getcontentlength",
+            "getcontenttype",
+            "getlastmodified",
+            "resourcetype",
+            "getetag",
+            "fileid",
+            "tags",
+        ]
+
+        return await self.search_files(
+            scope=scope,
+            where_conditions=where_conditions,
+            properties=properties,
+            limit=limit,
+        )
+
+    async def _get_file_info_by_id(self, file_id: int) -> Dict[str, Any]:
+        """Get file information by Nextcloud file ID using WebDAV.
+
+        Args:
+            file_id: Nextcloud internal file ID
+
+        Returns:
+            File information dictionary with path, size, content_type, etc.
+
+        Raises:
+            HTTPStatusError: If file not found or request fails
+        """
+        # Nextcloud allows accessing files by ID via special meta endpoint
+        meta_path = f"/remote.php/dav/meta/{file_id}/"
+
+        propfind_body = """<?xml version="1.0"?>
+        <d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+            <d:prop>
+                <d:displayname/>
+                <d:getcontentlength/>
+                <d:getcontenttype/>
+                <d:getlastmodified/>
+                <d:resourcetype/>
+                <d:getetag/>
+                <oc:fileid/>
+            </d:prop>
+        </d:propfind>"""
+
+        headers = {"Depth": "0", "Content-Type": "text/xml", "OCS-APIRequest": "true"}
+
+        response = await self._make_request(
+            "PROPFIND", meta_path, content=propfind_body, headers=headers
+        )
+        response.raise_for_status()
+
+        # Parse the XML response
+        root = ET.fromstring(response.content)
+        responses = root.findall(".//{DAV:}response")
+
+        if not responses:
+            raise RuntimeError(f"File ID {file_id} not found")
+
+        response_elem = responses[0]
+        href = response_elem.find(".//{DAV:}href")
+        if href is None:
+            raise RuntimeError(f"No href in response for file ID {file_id}")
+
+        propstat = response_elem.find(".//{DAV:}propstat")
+        if propstat is None:
+            raise RuntimeError(f"No propstat for file ID {file_id}")
+
+        prop = propstat.find(".//{DAV:}prop")
+        if prop is None:
+            raise RuntimeError(f"No prop for file ID {file_id}")
+
+        # Extract file path from displayname or construct from file ID
+        displayname_elem = prop.find(".//{DAV:}displayname")
+        name = (
+            displayname_elem.text if displayname_elem is not None else f"file_{file_id}"
+        )
+
+        # Get file properties
+        size_elem = prop.find(".//{DAV:}getcontentlength")
+        size = int(size_elem.text) if size_elem is not None and size_elem.text else 0
+
+        content_type_elem = prop.find(".//{DAV:}getcontenttype")
+        content_type = content_type_elem.text if content_type_elem is not None else None
+
+        modified_elem = prop.find(".//{DAV:}getlastmodified")
+        modified = modified_elem.text if modified_elem is not None else None
+
+        etag_elem = prop.find(".//{DAV:}getetag")
+        etag = (
+            etag_elem.text.strip('"')
+            if etag_elem is not None and etag_elem.text
+            else None
+        )
+
+        # Check if it's a directory
+        resourcetype = prop.find(".//{DAV:}resourcetype")
+        is_directory = (
+            resourcetype is not None
+            and resourcetype.find(".//{DAV:}collection") is not None
+        )
+
+        # Try to get actual file path - meta endpoint doesn't give us the real path
+        # so we'll construct a reasonable path from the name
+        # The calling code in NextcloudClient will have the context to determine the actual path
+        file_info = {
+            "name": name,
+            "path": f"/{name}",  # Placeholder - caller should use WebDAV to get real path if needed
+            "size": size,
+            "content_type": content_type,
+            "last_modified": modified,
+            "etag": etag,
+            "is_directory": is_directory,
+            "file_id": file_id,
+        }
+
+        logger.debug(f"Retrieved file info for ID {file_id}: {name}")
+        return file_info
+
+    async def get_tag_by_name(self, tag_name: str) -> dict[str, Any] | None:
+        """Get a system tag by its name via WebDAV.
+
+        Args:
+            tag_name: Name of the tag to find (case-sensitive)
+
+        Returns:
+            Tag dictionary if found, None otherwise
+        """
+        # Use WebDAV PROPFIND to list all systemtags
+        propfind_body = """<?xml version="1.0"?>
+<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+  <d:prop>
+    <oc:id/>
+    <oc:display-name/>
+    <oc:user-visible/>
+    <oc:user-assignable/>
+  </d:prop>
+</d:propfind>"""
+
+        response = await self._client.request(
+            "PROPFIND",
+            "/remote.php/dav/systemtags/",
+            headers={"Depth": "1"},
+            content=propfind_body,
+        )
+        response.raise_for_status()
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        for response_elem in root.findall("d:response", ns):
+            href = response_elem.find("d:href", ns)
+            if href is None or href.text == "/remote.php/dav/systemtags/":
+                # Skip the collection itself
+                continue
+
+            propstat = response_elem.find("d:propstat", ns)
+            if propstat is None:
+                continue
+
+            prop = propstat.find("d:prop", ns)
+            if prop is None:
+                continue
+
+            # Extract tag properties
+            tag_id_elem = prop.find("oc:id", ns)
+            display_name_elem = prop.find("oc:display-name", ns)
+            user_visible_elem = prop.find("oc:user-visible", ns)
+            user_assignable_elem = prop.find("oc:user-assignable", ns)
+
+            if display_name_elem is not None and display_name_elem.text == tag_name:
+                tag_info = {
+                    "id": int(tag_id_elem.text) if tag_id_elem is not None else None,
+                    "name": display_name_elem.text,
+                    "userVisible": user_visible_elem.text.lower() == "true"
+                    if user_visible_elem is not None
+                    else True,
+                    "userAssignable": user_assignable_elem.text.lower() == "true"
+                    if user_assignable_elem is not None
+                    else True,
+                }
+                logger.debug(f"Found tag '{tag_name}' with ID {tag_info['id']}")
+                return tag_info
+
+        logger.debug(f"Tag '{tag_name}' not found")
+        return None
+
+    async def get_files_by_tag(self, tag_id: int) -> list[dict[str, Any]]:
+        """Get all files tagged with a specific system tag via WebDAV REPORT.
+
+        Args:
+            tag_id: Numeric ID of the tag
+
+        Returns:
+            List of file info dictionaries with path, size, content_type, etc.
+        """
+        # Use WebDAV REPORT method with systemtag filter, requesting all properties
+        report_body = f"""<?xml version="1.0"?>
+<oc:filter-files xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns" xmlns:nc="http://nextcloud.org/ns">
+  <d:prop>
+    <oc:fileid/>
+    <d:displayname/>
+    <d:getcontentlength/>
+    <d:getcontenttype/>
+    <d:getlastmodified/>
+    <d:getetag/>
+  </d:prop>
+  <oc:filter-rules>
+    <oc:systemtag>{tag_id}</oc:systemtag>
+  </oc:filter-rules>
+</oc:filter-files>"""
+
+        response = await self._client.request(
+            "REPORT",
+            f"{self._get_webdav_base_path()}/",
+            content=report_body,
+        )
+        response.raise_for_status()
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        files = []
+        for response_elem in root.findall("d:response", ns):
+            # Extract href (file path)
+            href_elem = response_elem.find("d:href", ns)
+            if href_elem is None or not href_elem.text:
+                continue
+
+            propstat = response_elem.find("d:propstat", ns)
+            if propstat is None:
+                continue
+
+            prop = propstat.find("d:prop", ns)
+            if prop is None:
+                continue
+
+            # Extract all properties
+            fileid_elem = prop.find("oc:fileid", ns)
+            displayname_elem = prop.find("d:displayname", ns)
+            contentlength_elem = prop.find("d:getcontentlength", ns)
+            contenttype_elem = prop.find("d:getcontenttype", ns)
+            lastmodified_elem = prop.find("d:getlastmodified", ns)
+            etag_elem = prop.find("d:getetag", ns)
+
+            if fileid_elem is None or not fileid_elem.text:
+                continue
+
+            # Decode href path and extract the file path
+            from urllib.parse import unquote
+
+            href_path = unquote(href_elem.text)
+            # Remove WebDAV prefix to get user-relative path
+            webdav_prefix = f"/remote.php/dav/files/{self.username}/"
+            file_path = href_path.replace(webdav_prefix, "/")
+
+            # Parse last modified timestamp
+            last_modified_timestamp = None
+            if lastmodified_elem is not None and lastmodified_elem.text:
+                from email.utils import parsedate_to_datetime
+
+                try:
+                    dt = parsedate_to_datetime(lastmodified_elem.text)
+                    last_modified_timestamp = int(dt.timestamp())
+                except Exception:
+                    pass
+
+            file_info = {
+                "id": int(fileid_elem.text),
+                "path": file_path,
+                "name": displayname_elem.text
+                if displayname_elem is not None
+                else file_path.split("/")[-1],
+                "size": int(contentlength_elem.text)
+                if contentlength_elem is not None and contentlength_elem.text
+                else 0,
+                "content_type": contenttype_elem.text
+                if contenttype_elem is not None
+                else "",
+                "last_modified": lastmodified_elem.text
+                if lastmodified_elem is not None
+                else None,
+                "last_modified_timestamp": last_modified_timestamp,
+                "etag": etag_elem.text if etag_elem is not None else None,
+            }
+            files.append(file_info)
+
+        logger.debug(f"Found {len(files)} files with tag ID {tag_id}")
+        return files
+
+    async def get_file_info(self, path: str) -> dict[str, Any] | None:
+        """Get file info including file ID via WebDAV PROPFIND.
+
+        Args:
+            path: Path to the file (relative to user's files directory)
+
+        Returns:
+            File info dictionary with id, name, size, content_type, etc.
+            Returns None if file not found.
+        """
+        webdav_path = f"{self._get_webdav_base_path()}/{path.lstrip('/')}"
+
+        propfind_body = """<?xml version="1.0"?>
+<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
+  <d:prop>
+    <oc:fileid/>
+    <d:displayname/>
+    <d:getcontentlength/>
+    <d:getcontenttype/>
+    <d:getlastmodified/>
+    <d:getetag/>
+    <d:resourcetype/>
+  </d:prop>
+</d:propfind>"""
+
+        try:
+            response = await self._client.request(
+                "PROPFIND",
+                webdav_path,
+                headers={"Depth": "0"},
+                content=propfind_body,
+            )
+            response.raise_for_status()
+        except HTTPStatusError as e:
+            if e.response.status_code == 404:
+                logger.debug(f"File not found: {path}")
+                return None
+            raise
+
+        # Parse XML response
+        root = ET.fromstring(response.content)
+        ns = {
+            "d": "DAV:",
+            "oc": "http://owncloud.org/ns",
+        }
+
+        response_elem = root.find("d:response", ns)
+        if response_elem is None:
+            return None
+
+        propstat = response_elem.find("d:propstat", ns)
+        if propstat is None:
+            return None
+
+        prop = propstat.find("d:prop", ns)
+        if prop is None:
+            return None
+
+        # Extract properties
+        fileid_elem = prop.find("oc:fileid", ns)
+        displayname_elem = prop.find("d:displayname", ns)
+        contentlength_elem = prop.find("d:getcontentlength", ns)
+        contenttype_elem = prop.find("d:getcontenttype", ns)
+        lastmodified_elem = prop.find("d:getlastmodified", ns)
+        etag_elem = prop.find("d:getetag", ns)
+        resourcetype_elem = prop.find("d:resourcetype", ns)
+
+        is_directory = (
+            resourcetype_elem is not None
+            and resourcetype_elem.find("d:collection", ns) is not None
+        )
+
+        file_info = {
+            "id": int(fileid_elem.text) if fileid_elem is not None else None,
+            "path": path,
+            "name": displayname_elem.text
+            if displayname_elem is not None
+            else path.split("/")[-1],
+            "size": int(contentlength_elem.text)
+            if contentlength_elem is not None and contentlength_elem.text
+            else 0,
+            "content_type": contenttype_elem.text
+            if contenttype_elem is not None
+            else "",
+            "last_modified": lastmodified_elem.text
+            if lastmodified_elem is not None
+            else None,
+            "etag": etag_elem.text.strip('"')
+            if etag_elem is not None and etag_elem.text
+            else None,
+            "is_directory": is_directory,
+        }
+
+        logger.debug(f"Got file info for '{path}': id={file_info['id']}")
+        return file_info
+
+    async def create_tag(
+        self,
+        name: str,
+        user_visible: bool = True,
+        user_assignable: bool = True,
+    ) -> dict[str, Any]:
+        """Create a system tag via WebDAV.
+
+        Args:
+            name: Name of the tag to create
+            user_visible: Whether the tag is visible to users
+            user_assignable: Whether users can assign this tag
+
+        Returns:
+            Tag dictionary with id, name, userVisible, userAssignable
+
+        Raises:
+            HTTPStatusError: If tag creation fails (409 if already exists)
+        """
+        # Use WebDAV POST with JSON body to create tag
+        response = await self._client.post(
+            "/remote.php/dav/systemtags/",
+            headers={"Content-Type": "application/json"},
+            json={
+                "name": name,
+                "userVisible": user_visible,
+                "userAssignable": user_assignable,
+            },
+        )
+        response.raise_for_status()
+
+        # Extract tag ID from Content-Location header (e.g., /remote.php/dav/systemtags/42)
+        content_location = response.headers.get("Content-Location", "")
+        tag_id = None
+        if content_location:
+            # Extract the numeric ID from the path
+            try:
+                tag_id = int(content_location.rstrip("/").split("/")[-1])
+            except (ValueError, IndexError):
+                pass
+
+        tag_info = {
+            "id": tag_id,
+            "name": name,
+            "userVisible": user_visible,
+            "userAssignable": user_assignable,
+        }
+
+        logger.info(f"Created tag '{name}' with ID {tag_info['id']}")
+        return tag_info
+
+    async def get_or_create_tag(
+        self,
+        name: str,
+        user_visible: bool = True,
+        user_assignable: bool = True,
+    ) -> dict[str, Any]:
+        """Get a tag by name, creating it if it doesn't exist.
+
+        Args:
+            name: Name of the tag
+            user_visible: Whether the tag is visible to users (for creation)
+            user_assignable: Whether users can assign this tag (for creation)
+
+        Returns:
+            Tag dictionary with id, name, userVisible, userAssignable
+        """
+        # First try to get existing tag
+        existing_tag = await self.get_tag_by_name(name)
+        if existing_tag:
+            logger.debug(f"Tag '{name}' already exists with ID {existing_tag['id']}")
+            return existing_tag
+
+        # Create new tag
+        try:
+            return await self.create_tag(name, user_visible, user_assignable)
+        except HTTPStatusError as e:
+            if e.response.status_code == 409:
+                # Tag was created between our check and creation, fetch it
+                existing_tag = await self.get_tag_by_name(name)
+                if existing_tag:
+                    return existing_tag
+            raise
+
+    async def assign_tag_to_file(self, file_id: int, tag_id: int) -> bool:
+        """Assign a system tag to a file.
+
+        Args:
+            file_id: Numeric file ID
+            tag_id: Numeric tag ID
+
+        Returns:
+            True if tag was assigned successfully (or already assigned)
+
+        Raises:
+            HTTPStatusError: If tag assignment fails
+        """
+        response = await self._client.request(
+            "PUT",
+            f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
+            headers={"Content-Length": "0"},
+            content=b"",
+        )
+
+        # 201 = Created (new assignment), 409 = Conflict (already assigned)
+        if response.status_code in (201, 409):
+            logger.info(f"Tagged file {file_id} with tag {tag_id}")
+            return True
+
+        response.raise_for_status()
+        return True
+
+    async def remove_tag_from_file(self, file_id: int, tag_id: int) -> bool:
+        """Remove a system tag from a file.
+
+        Args:
+            file_id: Numeric file ID
+            tag_id: Numeric tag ID
+
+        Returns:
+            True if tag was removed successfully (or wasn't assigned)
+
+        Raises:
+            HTTPStatusError: If tag removal fails
+        """
+        response = await self._client.request(
+            "DELETE",
+            f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
+        )
+
+        # 204 = No Content (removed), 404 = Not Found (wasn't assigned)
+        if response.status_code in (204, 404):
+            logger.info(f"Removed tag {tag_id} from file {file_id}")
+            return True
+
+        response.raise_for_status()
+        return True
@@ -2,8 +2,37 @@ import logging
 import logging.config
 import os
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any, Optional

+
+class DeploymentMode(Enum):
+    """Deployment mode for the MCP server.
+
+    SELF_HOSTED: Full features, environment-based configuration.
+                 Supports vector sync, semantic search, admin UI.
+
+    SMITHERY_STATELESS: Stateless mode for Smithery hosting.
+                        Session-based configuration, no persistent storage.
+                        Excludes semantic search, vector sync, admin UI.
+    """
+
+    SELF_HOSTED = "self_hosted"
+    SMITHERY_STATELESS = "smithery"
+
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment.
+
+    Returns:
+        DeploymentMode.SMITHERY_STATELESS if SMITHERY_DEPLOYMENT=true,
+        otherwise DeploymentMode.SELF_HOSTED (default).
+    """
+    if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+
+
 LOGGING_CONFIG = {
    "version": 1,
    "disable_existing_loggers": False,
@@ -102,6 +131,14 @@ def get_document_processor_config() -> dict[str, Any]:
            "lang": os.getenv("TESSERACT_LANG", "eng"),
        }

+    # PyMuPDF configuration (local PDF processing)
+    if os.getenv("ENABLE_PYMUPDF", "true").lower() == "true":  # Enabled by default
+        config["processors"]["pymupdf"] = {
+            "extract_images": os.getenv("PYMUPDF_EXTRACT_IMAGES", "true").lower()
+            == "true",
+            "image_dir": os.getenv("PYMUPDF_IMAGE_DIR"),  # None = use temp directory
+        }
+
    # Custom processor (via HTTP API)
    if os.getenv("ENABLE_CUSTOM_PROCESSOR", "false").lower() == "true":
        custom_url = os.getenv("CUSTOM_PROCESSOR_URL")
@@ -180,9 +217,14 @@ class Settings:
    ollama_embedding_model: str = "nomic-embed-text"
    ollama_verify_ssl: bool = True

+    # OpenAI settings (for embeddings)
+    openai_api_key: Optional[str] = None
+    openai_base_url: Optional[str] = None
+    openai_embedding_model: str = "text-embedding-3-small"
+
    # Document chunking settings (for vector embeddings)
-    document_chunk_size: int = 512  # Words per chunk
-    document_chunk_overlap: int = 50  # Overlapping words between chunks
+    document_chunk_size: int = 2048  # Characters per chunk
+    document_chunk_overlap: int = 200  # Overlapping characters between chunks

    # Observability settings
    metrics_enabled: bool = True
@@ -227,10 +269,10 @@ class Settings:
                f"Overlap should be 10-20% of chunk size for optimal results."
            )

-        if self.document_chunk_size < 100:
+        if self.document_chunk_size < 512:
            logger.warning(
-                f"DOCUMENT_CHUNK_SIZE is set to {self.document_chunk_size} words, which is quite small. "
-                f"Smaller chunks may lose context. Consider using at least 256 words."
+                f"DOCUMENT_CHUNK_SIZE is set to {self.document_chunk_size} characters, which is quite small. "
+                f"Smaller chunks may lose context. Consider using at least 1024 characters."
            )

        if self.document_chunk_overlap < 0:
@@ -238,6 +280,29 @@ class Settings:
                f"DOCUMENT_CHUNK_OVERLAP ({self.document_chunk_overlap}) cannot be negative."
            )

+    def get_embedding_model_name(self) -> str:
+        """
+        Get the active embedding model name based on provider priority.
+
+        Priority order (same as ProviderRegistry):
+        1. OpenAI - if OPENAI_API_KEY is set
+        2. Ollama - if OLLAMA_BASE_URL is set
+        3. Simple - fallback (returns "simple-384")
+
+        Returns:
+            Active embedding model name
+        """
+        # Check OpenAI first (higher priority than Ollama in registry)
+        if self.openai_api_key:
+            return self.openai_embedding_model
+
+        # Check Ollama
+        if self.ollama_base_url:
+            return self.ollama_embedding_model
+
+        # Fallback to simple provider indicator
+        return "simple-384"
+
    def get_collection_name(self) -> str:
        """
        Get Qdrant collection name.
@@ -253,8 +318,9 @@ class Settings:
        Format: {deployment-id}-{model-name}

        Examples:
-            - "my-deployment-nomic-embed-text" (OTEL_SERVICE_NAME set)
-            - "mcp-container-all-minilm" (hostname fallback)
+            - "my-deployment-nomic-embed-text" (Ollama)
+            - "my-deployment-text-embedding-3-small" (OpenAI)
+            - "mcp-container-openai-text-embedding-3-small" (hostname fallback)

        Returns:
            Collection name string
@@ -274,7 +340,7 @@ class Settings:

        # Sanitize deployment ID and model name
        deployment_id = deployment_id.lower().replace(" ", "-").replace("_", "-")
-        model_name = self.ollama_embedding_model.replace("/", "-").replace(":", "-")
+        model_name = self.get_embedding_model_name().replace("/", "-").replace(":", "-")

        return f"{deployment_id}-{model_name}"

@@ -288,8 +354,8 @@ def get_settings() -> Settings:
    return Settings(
        # OAuth/OIDC settings
        oidc_discovery_url=os.getenv("OIDC_DISCOVERY_URL"),
-        oidc_client_id=os.getenv("OIDC_CLIENT_ID"),
-        oidc_client_secret=os.getenv("OIDC_CLIENT_SECRET"),
+        oidc_client_id=os.getenv("NEXTCLOUD_OIDC_CLIENT_ID"),
+        oidc_client_secret=os.getenv("NEXTCLOUD_OIDC_CLIENT_SECRET"),
        oidc_issuer=os.getenv("OIDC_ISSUER"),
        # Nextcloud settings
        nextcloud_host=os.getenv("NEXTCLOUD_HOST"),
@@ -334,9 +400,15 @@ def get_settings() -> Settings:
        ollama_base_url=os.getenv("OLLAMA_BASE_URL"),
        ollama_embedding_model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
        ollama_verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
+        # OpenAI settings
+        openai_api_key=os.getenv("OPENAI_API_KEY"),
+        openai_base_url=os.getenv("OPENAI_BASE_URL"),
+        openai_embedding_model=os.getenv(
+            "OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
+        ),
        # Document chunking settings
-        document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "512")),
-        document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "50")),
+        document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "2048")),
+        document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "200")),
        # Observability settings
        metrics_enabled=os.getenv("METRICS_ENABLED", "true").lower() == "true",
        metrics_port=int(os.getenv("METRICS_PORT", "9090")),
@@ -1,21 +1,37 @@
 """Helper functions for accessing context in MCP tools."""

+import logging
+
+from httpx import BasicAuth
 from mcp.server.fastmcp import Context

 from nextcloud_mcp_server.client import NextcloudClient
-from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.config import (
+    DeploymentMode,
+    get_deployment_mode,
+    get_settings,
+)
+
+logger = logging.getLogger(__name__)


 async def get_client(ctx: Context) -> NextcloudClient:
    """
    Get the appropriate Nextcloud client based on authentication mode.

-    ADR-005 compliant implementation supporting two modes:
-    1. BasicAuth mode: Returns shared client from lifespan context
-    2. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
-       Token already contains both MCP and Nextcloud audiences - use directly
-    3. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
-       Exchange MCP token for Nextcloud token via RFC 8693
+    ADR-016 compliant implementation supporting three deployment modes:
+
+    1. Smithery stateless mode (SMITHERY_DEPLOYMENT=true):
+       Create client from session configuration (nextcloud_url, username, app_password)
+       No persistent state - client created per-request from Smithery session config.
+
+    2. BasicAuth mode: Returns shared client from lifespan context
+
+    3. OAuth mode:
+       a. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
+          Token already contains both MCP and Nextcloud audiences - use directly
+       b. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
+          Exchange MCP token for Nextcloud token via RFC 8693

    SECURITY: Token passthrough has been REMOVED. All OAuth modes validate
    proper token audiences per MCP Security Best Practices specification.
@@ -24,7 +40,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
    by the MCP server via @require_scopes decorator, not by the IdP.

    This function automatically detects the authentication mode by checking
-    the type of the lifespan context.
+    the deployment mode and type of the lifespan context.

    Args:
        ctx: MCP request context
@@ -34,6 +50,7 @@ async def get_client(ctx: Context) -> NextcloudClient:

    Raises:
        AttributeError: If context doesn't contain expected data
+        ValueError: If Smithery mode but session config is missing required fields

    Example:
        ```python
@@ -43,6 +60,12 @@ async def get_client(ctx: Context) -> NextcloudClient:
            return await client.capabilities()
        ```
    """
+    deployment_mode = get_deployment_mode()
+
+    # ADR-016: Smithery stateless mode - create client from session config
+    if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
+        return _get_client_from_session_config(ctx)
+
    settings = get_settings()
    lifespan_ctx = ctx.request_context.lifespan_context

@@ -75,3 +98,82 @@ async def get_client(ctx: Context) -> NextcloudClient:
        f"Lifespan context does not have 'client' or 'nextcloud_host' attribute. "
        f"Type: {type(lifespan_ctx)}"
    )
+
+
+def _get_client_from_session_config(ctx: Context) -> NextcloudClient:
+    """
+    Create NextcloudClient from Smithery session configuration.
+
+    ADR-016: In Smithery stateless mode, each request includes session config
+    with the user's Nextcloud credentials. This function creates a fresh client
+    for each request - no state is persisted between requests.
+
+    For container runtime, config is extracted from URL query parameters by
+    SmitheryConfigMiddleware and stored in a context variable.
+
+    Expected session config fields (from Smithery configSchema):
+    - nextcloud_url: str - Nextcloud instance URL (required)
+    - username: str - Nextcloud username (required)
+    - app_password: str - Nextcloud app password (required)
+
+    Args:
+        ctx: MCP request context (not used directly for Smithery config)
+
+    Returns:
+        NextcloudClient configured with session credentials
+
+    Raises:
+        ValueError: If required session config fields are missing
+    """
+    # ADR-016: Get session config from context variable (set by SmitheryConfigMiddleware)
+    from nextcloud_mcp_server.app import get_smithery_session_config
+
+    session_config = get_smithery_session_config()
+
+    if session_config is None:
+        raise ValueError(
+            "Session configuration required in Smithery mode. "
+            "Ensure nextcloud_url, username, and app_password are provided as URL query parameters."
+        )
+
+    # Extract required fields - config is always a dict from SmitheryConfigMiddleware
+    nextcloud_url = session_config.get("nextcloud_url")
+    username = session_config.get("username")
+    app_password = session_config.get("app_password")
+
+    # Validate required fields
+    missing_fields = []
+    if not nextcloud_url:
+        missing_fields.append("nextcloud_url")
+    if not username:
+        missing_fields.append("username")
+    if not app_password:
+        missing_fields.append("app_password")
+
+    if missing_fields:
+        raise ValueError(
+            f"Missing required session config fields: {', '.join(missing_fields)}. "
+            f"Configure these in the Smithery connection settings."
+        )
+
+    # Type assertions after validation (for type checker)
+    # These are guaranteed to be str after the missing_fields check above
+    assert nextcloud_url is not None
+    assert username is not None
+    assert app_password is not None
+
+    # Validate URL format
+    if not nextcloud_url.startswith(("http://", "https://")):
+        raise ValueError(
+            f"Invalid nextcloud_url: {nextcloud_url}. "
+            f"Must start with http:// or https://"
+        )
+
+    logger.debug(f"Creating Smithery client for {nextcloud_url} as {username}")
+
+    # Create client with session credentials using BasicAuth
+    return NextcloudClient(
+        base_url=nextcloud_url,
+        username=username,
+        auth=BasicAuth(username, app_password),
+    )
@@ -1,12 +1,18 @@
 """Document processing plugins for extracting text from various file formats."""

 from .base import DocumentProcessor, ProcessingResult, ProcessorError
+from .pymupdf import PyMuPDFProcessor
 from .registry import ProcessorRegistry, get_registry

+# Register processors at module initialization
+_registry = get_registry()
+_registry.register(PyMuPDFProcessor(), priority=10)
+
 __all__ = [
    "DocumentProcessor",
    "ProcessingResult",
    "ProcessorError",
    "ProcessorRegistry",
    "get_registry",
+    "PyMuPDFProcessor",
 ]
@@ -0,0 +1,253 @@
+"""Document processor using PyMuPDF (fitz) library."""
+
+import logging
+import pathlib
+import tempfile
+from collections.abc import Awaitable, Callable
+from typing import Any, Optional
+
+# NOTE: Do NOT call pymupdf.layout.activate() here!
+# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
+# causing it to return a string instead of a list[dict].
+# See: https://github.com/pymupdf/pymupdf4llm/issues/323
+import pymupdf
+import pymupdf4llm
+
+from .base import DocumentProcessor, ProcessingResult, ProcessorError
+
+logger = logging.getLogger(__name__)
+
+
+class PyMuPDFProcessor(DocumentProcessor):
+    """Document processor using PyMuPDF library for PDF processing.
+
+    PyMuPDF (fitz) is a fast, local PDF processing library that extracts text,
+    metadata, and images without requiring external API calls.
+
+    Features:
+    - Fast text extraction with layout preservation
+    - PDF metadata extraction (title, author, creation date, page count)
+    - Image extraction for future multimodal support
+    - Page number tracking for precise citations
+    """
+
+    SUPPORTED_TYPES = {
+        "application/pdf",
+    }
+
+    def __init__(
+        self,
+        extract_images: bool = True,
+        image_dir: Optional[str | pathlib.Path] = None,
+    ):
+        """Initialize PyMuPDF processor.
+
+        Args:
+            extract_images: Whether to extract embedded images from PDFs
+            image_dir: Directory to store extracted images (defaults to temp directory)
+        """
+        self.extract_images = extract_images
+
+        if image_dir is None:
+            self.image_dir = pathlib.Path(tempfile.gettempdir()) / "pdf-images"
+        else:
+            self.image_dir = pathlib.Path(image_dir)
+
+        # Create image directory if it doesn't exist
+        if self.extract_images:
+            self.image_dir.mkdir(exist_ok=True, parents=True)
+            logger.info(
+                f"Initialized PyMuPDFProcessor with image extraction to {self.image_dir}"
+            )
+        else:
+            logger.info("Initialized PyMuPDFProcessor without image extraction")
+
+    @property
+    def name(self) -> str:
+        return "pymupdf"
+
+    @property
+    def supported_mime_types(self) -> set[str]:
+        return self.SUPPORTED_TYPES
+
+    async def process(
+        self,
+        content: bytes,
+        content_type: str,
+        filename: Optional[str] = None,
+        options: Optional[dict[str, Any]] = None,
+        progress_callback: Optional[
+            Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
+        ] = None,
+    ) -> ProcessingResult:
+        """Process a PDF document and extract text, metadata, and images.
+
+        Args:
+            content: PDF document bytes
+            content_type: MIME type (should be application/pdf)
+            filename: Optional filename for better error messages
+            options: Processing options (currently unused)
+            progress_callback: Optional callback for progress updates
+
+        Returns:
+            ProcessingResult with extracted text and metadata
+
+        Raises:
+            ProcessorError: If PDF processing fails
+        """
+        import anyio
+
+        try:
+            if progress_callback:
+                await progress_callback(0, 100, "Opening PDF document")
+
+            # Open document and extract metadata in thread
+            doc = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                lambda: pymupdf.open("pdf", content)
+            )
+
+            metadata = self._extract_metadata(doc, filename)
+            metadata["file_size"] = len(content)
+            page_count = doc.page_count
+
+            if progress_callback:
+                await progress_callback(10, 100, f"Extracting {page_count} pages")
+
+            # Prepare image directory if needed
+            pdf_image_dir = None
+            if self.extract_images:
+                pdf_id = filename.replace("/", "_") if filename else "unknown"
+                pdf_image_dir = self.image_dir / pdf_id
+                pdf_image_dir.mkdir(exist_ok=True, parents=True)
+
+            # Extract all pages in a single call with page_chunks=True
+            def do_extract() -> list[dict[str, Any]]:
+                # When page_chunks=True, to_markdown returns list[dict] not str
+                return pymupdf4llm.to_markdown(  # type: ignore[return-value]
+                    doc,
+                    write_images=self.extract_images,
+                    image_path=pdf_image_dir if self.extract_images else None,
+                    page_chunks=True,
+                )
+
+            page_chunks: list[dict[str, Any]] = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                do_extract
+            )
+
+            if progress_callback:
+                await progress_callback(90, 100, "Building result")
+
+            # Extract page texts and build boundaries from chunks
+            page_texts: list[str] = []
+            page_boundaries: list[dict[str, Any]] = []
+            current_offset = 0
+            for chunk in page_chunks:
+                text = chunk.get("text", "")
+                page_num = chunk.get("metadata", {}).get("page", len(page_texts) + 1)
+                page_texts.append(text)
+                page_boundaries.append(
+                    {
+                        "page": page_num,
+                        "start_offset": current_offset,
+                        "end_offset": current_offset + len(text),
+                    }
+                )
+                current_offset += len(text)
+
+            # Collect image paths
+            image_paths = []
+            if pdf_image_dir and pdf_image_dir.exists():
+                image_paths = [str(p) for p in pdf_image_dir.glob("*")]
+
+            # Build final text and metadata
+            md_text = "".join(page_texts)
+            metadata["has_images"] = len(image_paths) > 0
+            if image_paths:
+                metadata["image_count"] = len(image_paths)
+                metadata["image_paths"] = image_paths
+            metadata["page_boundaries"] = page_boundaries
+
+            # Close document
+            doc.close()
+
+            if progress_callback:
+                await progress_callback(100, 100, "Processing complete")
+
+            logger.info(
+                f"Successfully processed PDF {filename or '<bytes>'}: "
+                f"{metadata['page_count']} pages, {len(md_text)} chars, "
+                f"{metadata.get('image_count', 0)} images"
+            )
+
+            return ProcessingResult(
+                text=md_text,
+                metadata=metadata,
+                processor=self.name,
+                success=True,
+            )
+
+        except Exception as e:
+            error_msg = f"Failed to process PDF {filename or '<bytes>'}: {e}"
+            logger.error(error_msg, exc_info=True)
+            raise ProcessorError(error_msg) from e
+
+    def _extract_metadata(
+        self, doc: pymupdf.Document, filename: Optional[str]
+    ) -> dict[str, Any]:
+        """Extract metadata from PDF document.
+
+        Args:
+            doc: Opened PyMuPDF document
+            filename: Optional filename
+
+        Returns:
+            Dictionary with PDF metadata
+        """
+        metadata: dict[str, Any] = {}
+
+        # Basic document info
+        metadata["page_count"] = doc.page_count
+        metadata["format"] = "PDF 1." + str(
+            doc.pdf_version() if hasattr(doc, "pdf_version") else "?"  # type: ignore[call-non-callable]
+        )
+
+        if filename:
+            metadata["filename"] = filename
+
+        # Extract PDF metadata dictionary
+        pdf_metadata = doc.metadata
+        if pdf_metadata:
+            # Standard PDF metadata fields
+            if pdf_metadata.get("title"):
+                metadata["title"] = pdf_metadata["title"]
+            if pdf_metadata.get("author"):
+                metadata["author"] = pdf_metadata["author"]
+            if pdf_metadata.get("subject"):
+                metadata["subject"] = pdf_metadata["subject"]
+            if pdf_metadata.get("keywords"):
+                metadata["keywords"] = pdf_metadata["keywords"]
+            if pdf_metadata.get("creator"):
+                metadata["creator"] = pdf_metadata["creator"]
+            if pdf_metadata.get("producer"):
+                metadata["producer"] = pdf_metadata["producer"]
+            if pdf_metadata.get("creationDate"):
+                metadata["creation_date"] = pdf_metadata["creationDate"]
+            if pdf_metadata.get("modDate"):
+                metadata["modification_date"] = pdf_metadata["modDate"]
+
+        return metadata
+
+    async def health_check(self) -> bool:
+        """Check if PyMuPDF is available and working.
+
+        Returns:
+            True if processor is ready to use
+        """
+        try:
+            # Try to create a simple PDF in memory
+            test_doc = pymupdf.open()
+            test_doc.close()
+            return True
+        except Exception as e:
+            logger.error(f"PyMuPDF health check failed: {e}")
+            return False
@@ -1,6 +1,13 @@
 """Embedding service package for generating vector embeddings."""

-from .service import EmbeddingService, get_embedding_service
+from .bm25_provider import BM25SparseEmbeddingProvider
+from .service import EmbeddingService, get_bm25_service, get_embedding_service
 from .simple_provider import SimpleEmbeddingProvider

-__all__ = ["EmbeddingService", "get_embedding_service", "SimpleEmbeddingProvider"]
+__all__ = [
+    "EmbeddingService",
+    "get_embedding_service",
+    "BM25SparseEmbeddingProvider",
+    "get_bm25_service",
+    "SimpleEmbeddingProvider",
+]
@@ -0,0 +1,98 @@
+"""BM25 sparse embedding provider using FastEmbed."""
+
+import logging
+from typing import Any
+
+from fastembed import SparseTextEmbedding
+
+logger = logging.getLogger(__name__)
+
+
+class BM25SparseEmbeddingProvider:
+    """
+    BM25 sparse embedding provider for hybrid search.
+
+    Uses FastEmbed's BM25 model to generate sparse vectors for keyword-based
+    retrieval. These sparse vectors are combined with dense semantic vectors
+    in Qdrant using Reciprocal Rank Fusion (RRF) for hybrid search.
+
+    Unlike dense embeddings which have fixed dimensions, sparse embeddings
+    have variable-length vectors with (index, value) pairs representing
+    term frequencies in the BM25 vocabulary.
+    """
+
+    def __init__(self, model_name: str = "Qdrant/bm25"):
+        """
+        Initialize BM25 sparse embedding provider.
+
+        Args:
+            model_name: FastEmbed BM25 model name (default: Qdrant/bm25)
+        """
+        self.model_name = model_name
+        logger.info(f"Initializing BM25 sparse embedding provider: {model_name}")
+
+        # Initialize FastEmbed sparse embedding model
+        self.model = SparseTextEmbedding(model_name=model_name)
+        logger.info(f"BM25 sparse embedding model loaded: {model_name}")
+
+    def encode(self, text: str) -> dict[str, Any]:
+        """
+        Generate BM25 sparse embedding for a single text (synchronous).
+
+        Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
+
+        Args:
+            text: Input text to encode
+
+        Returns:
+            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
+        """
+        # FastEmbed returns a generator, take first result
+        sparse_embedding = next(iter(self.model.embed([text])))
+
+        return {
+            "indices": sparse_embedding.indices.tolist(),
+            "values": sparse_embedding.values.tolist(),
+        }
+
+    async def encode_async(self, text: str) -> dict[str, Any]:
+        """
+        Generate BM25 sparse embedding for a single text (async).
+
+        Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
+
+        Args:
+            text: Input text to encode
+
+        Returns:
+            Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
+        """
+        import anyio
+
+        # Run CPU-bound BM25 encoding in thread pool
+        return await anyio.to_thread.run_sync(lambda: self.encode(text))  # type: ignore[attr-defined]
+
+    async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
+        """
+        Generate BM25 sparse embeddings for multiple texts (batched).
+
+        Args:
+            texts: List of texts to encode
+
+        Returns:
+            List of dictionaries with 'indices' and 'values' for each text
+        """
+        import anyio
+
+        # Run CPU-bound BM25 encoding in thread pool to avoid blocking event loop
+        sparse_embeddings = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+            lambda: list(self.model.embed(texts))
+        )
+
+        return [
+            {
+                "indices": emb.indices.tolist(),
+                "values": emb.values.tolist(),
+            }
+            for emb in sparse_embeddings
+        ]
@@ -1,56 +1,30 @@
-"""Embedding service with provider detection."""
+"""Embedding service with provider detection.
+
+DEPRECATED: This module is maintained for backward compatibility.
+New code should use nextcloud_mcp_server.providers.get_provider() directly.
+"""

 import logging
-import os

-from .base import EmbeddingProvider
-from .ollama_provider import OllamaEmbeddingProvider
-from .simple_provider import SimpleEmbeddingProvider
+from nextcloud_mcp_server.providers import get_provider
+
+from .bm25_provider import BM25SparseEmbeddingProvider

 logger = logging.getLogger(__name__)


 class EmbeddingService:
-    """Unified embedding service with automatic provider detection."""
+    """
+    Unified embedding service with automatic provider detection.
+
+    DEPRECATED: This class wraps the new unified provider infrastructure
+    for backward compatibility. New code should use
+    nextcloud_mcp_server.providers.get_provider() directly.
+    """

    def __init__(self):
        """Initialize embedding service with auto-detected provider."""
-        self.provider = self._detect_provider()
-
-    def _detect_provider(self) -> EmbeddingProvider:
-        """
-        Auto-detect available embedding provider.
-
-        Checks environment variables in order:
-        1. OLLAMA_BASE_URL - Use Ollama provider (production)
-        2. OPENAI_API_KEY - Use OpenAI provider (future)
-        3. Fallback to SimpleEmbeddingProvider (testing/development)
-
-        Returns:
-            Configured embedding provider
-        """
-        # Ollama provider (production)
-        ollama_url = os.getenv("OLLAMA_BASE_URL")
-        if ollama_url:
-            logger.info(f"Using Ollama embedding provider: {ollama_url}")
-            return OllamaEmbeddingProvider(
-                base_url=ollama_url,
-                model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
-                verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
-            )
-
-        # OpenAI provider (future implementation)
-        # openai_key = os.getenv("OPENAI_API_KEY")
-        # if openai_key:
-        #     return OpenAIEmbeddingProvider(api_key=openai_key)
-
-        # Fallback to simple provider for development/testing
-        logger.warning(
-            "No embedding provider configured (OLLAMA_BASE_URL or OPENAI_API_KEY not set). "
-            "Using SimpleEmbeddingProvider for testing/development. "
-            "For production, configure an external embedding service."
-        )
-        return SimpleEmbeddingProvider(dimension=384)
+        self.provider = get_provider()

    async def embed(self, text: str) -> list[float]:
        """
@@ -109,3 +83,20 @@ def get_embedding_service() -> EmbeddingService:
    if _embedding_service is None:
        _embedding_service = EmbeddingService()
    return _embedding_service
+
+
+# BM25 sparse embedding singleton
+_bm25_service: BM25SparseEmbeddingProvider | None = None
+
+
+def get_bm25_service() -> BM25SparseEmbeddingProvider:
+    """
+    Get singleton BM25 sparse embedding service instance.
+
+    Returns:
+        Global BM25SparseEmbeddingProvider instance
+    """
+    global _bm25_service
+    if _bm25_service is None:
+        _bm25_service = BM25SparseEmbeddingProvider()
+    return _bm25_service
@@ -10,7 +10,7 @@ from .base import BaseResponse
 class SemanticSearchResult(BaseModel):
    """Model for semantic search results with additional metadata."""

-    id: int = Field(description="Document ID")
+    id: int = Field(description="Document ID (int for all document types)")
    doc_type: str = Field(
        description="Document type (note, calendar_event, deck_card, etc.)"
    )
@@ -19,9 +19,45 @@ class SemanticSearchResult(BaseModel):
        default="", description="Document category (notes) or location (calendar)"
    )
    excerpt: str = Field(description="Excerpt from matching chunk")
-    score: float = Field(description="Semantic similarity score (0-1)")
+    score: float = Field(
+        description=(
+            "Relevance score (≥ 0.0, higher is better). "
+            "Score range depends on fusion method: "
+            "RRF produces scores in [0.0, 1.0], "
+            "DBSF can exceed 1.0 (sum of normalized scores from multiple systems)"
+        )
+    )
    chunk_index: int = Field(description="Index of matching chunk in document")
    total_chunks: int = Field(description="Total number of chunks in document")
+    chunk_start_offset: Optional[int] = Field(
+        default=None, description="Character position where chunk starts in document"
+    )
+    chunk_end_offset: Optional[int] = Field(
+        default=None, description="Character position where chunk ends in document"
+    )
+    page_number: Optional[int] = Field(
+        default=None, description="Page number for PDF documents"
+    )
+    # Context expansion fields (optional, populated when include_context=True)
+    has_context_expansion: bool = Field(
+        default=False, description="Whether context expansion was performed"
+    )
+    marked_text: Optional[str] = Field(
+        default=None,
+        description="Full text with position markers around matched chunk",
+    )
+    before_context: Optional[str] = Field(
+        default=None, description="Text before the matched chunk"
+    )
+    after_context: Optional[str] = Field(
+        default=None, description="Text after the matched chunk"
+    )
+    has_before_truncation: Optional[bool] = Field(
+        default=None, description="Whether before_context was truncated"
+    )
+    has_after_truncation: Optional[bool] = Field(
+        default=None, description="Whether after_context was truncated"
+    )


 class SemanticSearchResponse(BaseResponse):
@@ -37,11 +37,18 @@ class HealthCheckFilter(logging.Filter):
        """
        # Check if the log message contains health check endpoints
        message = record.getMessage()
-        return not any(
+        health_check = any(
            endpoint in message
-            for endpoint in ["/health/live", "/health/ready", "/metrics"]
+            for endpoint in [
+                "/health/live",
+                "/health/ready",
+                "/metrics",
+                "/app/vector-sync/status",
+            ]
        )

+        return not health_check
+

 class TraceContextFormatter(JsonFormatter):
    """
@@ -404,10 +404,11 @@ def update_vector_sync_queue_size(size: int) -> None:

 def instrument_tool(func):
    """
-    Decorator to automatically instrument MCP tool functions with metrics.
+    Decorator to automatically instrument MCP tool functions with metrics and tracing.

-    Wraps async tool functions to record execution time and success/error status.
-    Compatible with @mcp.tool() and @require_scopes() decorators.
+    Wraps async tool functions to record execution time, success/error status, and
+    create OpenTelemetry trace spans. Compatible with @mcp.tool() and @require_scopes()
+    decorators.

    Usage:
        @mcp.tool()
@@ -420,24 +421,46 @@ def instrument_tool(func):
        func: The async function to instrument

    Returns:
-        Wrapped function with metrics instrumentation
+        Wrapped function with metrics and tracing instrumentation
    """
    import functools
    import time

+    from nextcloud_mcp_server.observability.tracing import trace_operation
+
    @functools.wraps(func)
    async def wrapper(*args, **kwargs):
        tool_name = func.__name__
        start_time = time.time()
-        try:
-            result = await func(*args, **kwargs)
-            duration = time.time() - start_time
-            record_tool_call(tool_name, duration, "success")
-            return result
-        except Exception as e:
-            duration = time.time() - start_time
-            record_tool_call(tool_name, duration, "error")
-            record_tool_error(tool_name, type(e).__name__)
-            raise
+
+        # Extract tool arguments for tracing (sanitize sensitive fields)
+        # kwargs contains the actual arguments passed to the tool
+        tool_args = {
+            k: v
+            for k, v in kwargs.items()
+            if k not in ("password", "token", "secret", "api_key", "etag", "ctx")
+        }
+
+        # Create trace span with metrics collection
+        with trace_operation(
+            f"mcp.tool.{tool_name}",
+            attributes={
+                "mcp.tool.name": tool_name,
+                "mcp.tool.args": str(tool_args)[:500]
+                if tool_args
+                else None,  # Limit to 500 chars
+            },
+            record_exception=True,
+        ):
+            try:
+                result = await func(*args, **kwargs)
+                duration = time.time() - start_time
+                record_tool_call(tool_name, duration, "success")
+                return result
+            except Exception as e:
+                duration = time.time() - start_time
+                record_tool_call(tool_name, duration, "error")
+                record_tool_error(tool_name, type(e).__name__)
+                raise

    return wrapper
@@ -66,8 +66,12 @@ class ObservabilityMiddleware(BaseHTTPMiddleware):
        # Record start time
        start_time = time.time()

-        # Skip tracing for health/metrics endpoints to reduce noise
-        should_trace = not (path.startswith("/health/") or path == "/metrics")
+        # Skip tracing for health/metrics/polling endpoints to reduce noise
+        should_trace = not (
+            path.startswith("/health/")
+            or path == "/metrics"
+            or path == "/app/vector-sync/status"
+        )

        try:
            if should_trace:
@@ -0,0 +1,20 @@
+"""Unified provider infrastructure for embeddings and text generation."""
+
+from .anthropic import AnthropicProvider
+from .base import Provider
+from .bedrock import BedrockProvider
+from .ollama import OllamaProvider
+from .openai import OpenAIProvider
+from .registry import get_provider, reset_provider
+from .simple import SimpleProvider
+
+__all__ = [
+    "Provider",
+    "OllamaProvider",
+    "OpenAIProvider",
+    "AnthropicProvider",
+    "SimpleProvider",
+    "BedrockProvider",
+    "get_provider",
+    "reset_provider",
+]
@@ -0,0 +1,97 @@
+"""Unified Anthropic provider for text generation."""
+
+import logging
+
+from anthropic import AsyncAnthropic
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class AnthropicProvider(Provider):
+    """
+    Anthropic provider for text generation.
+
+    Supports Claude models via the Anthropic API.
+    Note: Anthropic doesn't provide embedding models, only text generation.
+    """
+
+    def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
+        """
+        Initialize Anthropic provider.
+
+        Args:
+            api_key: Anthropic API key
+            model: Model name (e.g., "claude-3-5-sonnet-20241022")
+        """
+        self.client = AsyncAnthropic(api_key=api_key)
+        self.model = model
+
+        logger.info(f"Initialized Anthropic provider (model={model})")
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return False
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return True
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Raises:
+            NotImplementedError: Anthropic doesn't provide embedding models
+        """
+        raise NotImplementedError(
+            "Embedding not supported by Anthropic - use Ollama or Bedrock for embeddings"
+        )
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text using Anthropic API.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+        """
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            temperature=0.7,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        return message.content[0].text
+
+    async def close(self) -> None:
+        """Close the client (no-op for Anthropic SDK)."""
+        pass
@@ -0,0 +1,91 @@
+"""Unified provider interface for embeddings and text generation."""
+
+from abc import ABC, abstractmethod
+
+
+class Provider(ABC):
+    """
+    Unified base class for LLM providers.
+
+    Providers can support embeddings, text generation, or both.
+    Use capability properties to determine what features are available.
+    """
+
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts (optimized).
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension for this provider.
+
+        Returns:
+            Vector dimension (e.g., 768 for nomic-embed-text)
+
+        Raises:
+            NotImplementedError: If provider doesn't support embeddings
+        """
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If provider doesn't support generation
+        """
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close the provider and release resources."""
+        pass
@@ -0,0 +1,397 @@
+"""Amazon Bedrock provider for embeddings and text generation."""
+
+import json
+import logging
+from typing import Any
+
+try:
+    import boto3
+    from botocore.exceptions import BotoCoreError, ClientError
+
+    BOTO3_AVAILABLE = True
+except ImportError:
+    BOTO3_AVAILABLE = False
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class BedrockProvider(Provider):
+    """
+    Amazon Bedrock provider supporting both embeddings and text generation.
+
+    Uses AWS Bedrock Runtime API with boto3. Supports various model families:
+    - Embeddings: amazon.titan-embed-text-v1, amazon.titan-embed-text-v2, cohere.embed-*
+    - Text Generation: anthropic.claude-*, meta.llama3-*, amazon.titan-text-*, mistral.*, etc.
+
+    Requires AWS credentials configured via:
+    - Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION)
+    - AWS credentials file (~/.aws/credentials)
+    - IAM role (when running on AWS)
+    """
+
+    def __init__(
+        self,
+        region_name: str | None = None,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        aws_access_key_id: str | None = None,
+        aws_secret_access_key: str | None = None,
+    ):
+        """
+        Initialize Bedrock provider.
+
+        Args:
+            region_name: AWS region (e.g., "us-east-1"). Defaults to AWS_REGION env var.
+            embedding_model: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0").
+                None disables embeddings.
+            generation_model: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0").
+                None disables generation.
+            aws_access_key_id: AWS access key (optional, uses default credential chain if not provided)
+            aws_secret_access_key: AWS secret key (optional, uses default credential chain if not provided)
+
+        Raises:
+            ImportError: If boto3 is not installed
+        """
+        if not BOTO3_AVAILABLE:
+            raise ImportError(
+                "boto3 is required for Bedrock provider. Install with: pip install boto3"
+            )
+
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self._dimension: int | None = None  # Detected dynamically
+
+        # Initialize bedrock-runtime client
+        client_kwargs: dict[str, Any] = {}
+        if region_name:
+            client_kwargs["region_name"] = region_name
+        if aws_access_key_id:
+            client_kwargs["aws_access_key_id"] = aws_access_key_id
+        if aws_secret_access_key:
+            client_kwargs["aws_secret_access_key"] = aws_secret_access_key
+
+        self.client = boto3.client("bedrock-runtime", **client_kwargs)
+
+        logger.info(
+            f"Initialized Bedrock provider in region {region_name or 'default'} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model})"
+        )
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    def _create_embedding_request(self, text: str) -> dict[str, Any]:
+        """
+        Create model-specific embedding request payload.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Request payload dict for the embedding model
+        """
+        if not self.embedding_model:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        # Titan Embed models
+        if self.embedding_model.startswith("amazon.titan-embed"):
+            return {"inputText": text}
+
+        # Cohere Embed models
+        elif self.embedding_model.startswith("cohere.embed"):
+            return {"texts": [text], "input_type": "search_document"}
+
+        # Unknown model - try Titan format as default
+        else:
+            logger.warning(
+                f"Unknown embedding model format for {self.embedding_model}, "
+                "using Titan format as default"
+            )
+            return {"inputText": text}
+
+    def _parse_embedding_response(self, response: dict[str, Any]) -> list[float]:
+        """
+        Parse model-specific embedding response.
+
+        Args:
+            response: Raw response from Bedrock
+
+        Returns:
+            Embedding vector as list of floats
+        """
+        # Titan Embed models
+        if self.embedding_model and self.embedding_model.startswith(
+            "amazon.titan-embed"
+        ):
+            return response["embedding"]
+
+        # Cohere Embed models
+        elif self.embedding_model and self.embedding_model.startswith("cohere.embed"):
+            return response["embeddings"][0]
+
+        # Unknown model - try Titan format as default
+        else:
+            logger.warning(
+                f"Unknown embedding response format for {self.embedding_model}, "
+                "trying Titan format"
+            )
+            return response.get("embedding", response.get("embeddings", [None])[0])
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        try:
+            request_body = self._create_embedding_request(text)
+
+            response = self.client.invoke_model(
+                modelId=self.embedding_model,
+                body=json.dumps(request_body),
+                accept="application/json",
+                contentType="application/json",
+            )
+
+            response_body = json.loads(response["body"].read())
+            embedding = self._parse_embedding_response(response_body)
+
+            return embedding
+
+        except (BotoCoreError, ClientError) as e:
+            logger.error(f"Bedrock embedding error: {e}")
+            raise
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts.
+
+        Note: Current implementation sends requests sequentially.
+        Future optimization could use asyncio for concurrent requests.
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        embeddings = []
+        for text in texts:
+            embedding = await self.embed(text)
+            embeddings.append(embedding)
+        return embeddings
+
+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+        """
+        if self._dimension is None and self.supports_embeddings:
+            logger.debug(
+                f"Detecting embedding dimension for model {self.embedding_model}..."
+            )
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
+        return self._dimension
+
+    def _create_generation_request(
+        self, prompt: str, max_tokens: int
+    ) -> dict[str, Any]:
+        """
+        Create model-specific text generation request payload.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Request payload dict for the generation model
+        """
+        if not self.generation_model:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        # Anthropic Claude models
+        if self.generation_model.startswith("anthropic.claude"):
+            return {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens,
+                "temperature": 0.7,
+                "messages": [{"role": "user", "content": prompt}],
+            }
+
+        # Meta Llama models
+        elif self.generation_model.startswith("meta.llama"):
+            return {"prompt": prompt, "max_gen_len": max_tokens, "temperature": 0.7}
+
+        # Amazon Titan Text models
+        elif self.generation_model.startswith("amazon.titan-text"):
+            return {
+                "inputText": prompt,
+                "textGenerationConfig": {
+                    "maxTokenCount": max_tokens,
+                    "temperature": 0.7,
+                },
+            }
+
+        # Mistral models
+        elif self.generation_model.startswith("mistral"):
+            return {"prompt": prompt, "max_tokens": max_tokens, "temperature": 0.7}
+
+        # Unknown model - try Claude format as default
+        else:
+            logger.warning(
+                f"Unknown generation model format for {self.generation_model}, "
+                "using Claude format as default"
+            )
+            return {
+                "anthropic_version": "bedrock-2023-05-31",
+                "max_tokens": max_tokens,
+                "temperature": 0.7,
+                "messages": [{"role": "user", "content": prompt}],
+            }
+
+    def _parse_generation_response(self, response: dict[str, Any]) -> str:
+        """
+        Parse model-specific text generation response.
+
+        Args:
+            response: Raw response from Bedrock
+
+        Returns:
+            Generated text
+        """
+        # Anthropic Claude models
+        if self.generation_model and self.generation_model.startswith(
+            "anthropic.claude"
+        ):
+            return response["content"][0]["text"]
+
+        # Meta Llama models
+        elif self.generation_model and self.generation_model.startswith("meta.llama"):
+            return response["generation"]
+
+        # Amazon Titan Text models
+        elif self.generation_model and self.generation_model.startswith(
+            "amazon.titan-text"
+        ):
+            return response["results"][0]["outputText"]
+
+        # Mistral models
+        elif self.generation_model and self.generation_model.startswith("mistral"):
+            return response["outputs"][0]["text"]
+
+        # Unknown model - try common response fields
+        else:
+            logger.warning(
+                f"Unknown generation response format for {self.generation_model}, "
+                "trying common fields"
+            )
+            # Try common response field names
+            for field in ["text", "generation", "outputText", "completion"]:
+                if field in response:
+                    return response[field]
+            # Last resort: return JSON string
+            return json.dumps(response)
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+            ClientError: If Bedrock API call fails
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        try:
+            request_body = self._create_generation_request(prompt, max_tokens)
+
+            response = self.client.invoke_model(
+                modelId=self.generation_model,
+                body=json.dumps(request_body),
+                accept="application/json",
+                contentType="application/json",
+            )
+
+            response_body = json.loads(response["body"].read())
+            text = self._parse_generation_response(response_body)
+
+            return text
+
+        except (BotoCoreError, ClientError) as e:
+            logger.error(f"Bedrock generation error: {e}")
+            raise
+
+    async def close(self) -> None:
+        """Close the client (no-op for boto3 clients)."""
+        pass
@@ -0,0 +1,234 @@
+"""Unified Ollama provider for embeddings and text generation."""
+
+import logging
+
+import httpx
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+
+class OllamaProvider(Provider):
+    """
+    Ollama provider supporting both embeddings and text generation.
+
+    Supports TLS, SSL verification, and automatic model loading.
+    """
+
+    def __init__(
+        self,
+        base_url: str,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        verify_ssl: bool = True,
+        timeout: httpx.Timeout | None = None,
+    ):
+        """
+        Initialize Ollama provider.
+
+        Args:
+            base_url: Ollama API base URL (e.g., https://ollama.internal.example.com:443)
+            embedding_model: Model for embeddings (e.g., "nomic-embed-text"). None disables embeddings.
+            generation_model: Model for text generation (e.g., "llama3.2:1b"). None disables generation.
+            verify_ssl: Verify SSL certificates (default: True)
+            timeout: HTTP timeout configuration
+        """
+        self.base_url = base_url.rstrip("/")
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self.verify_ssl = verify_ssl
+
+        if timeout is None:
+            timeout = httpx.Timeout(timeout=120, connect=5)
+
+        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
+        self._dimension: int | None = None  # Detected dynamically for embeddings
+
+        logger.info(
+            f"Initialized Ollama provider: {base_url} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model}, "
+            f"verify_ssl={verify_ssl})"
+        )
+
+        # Pre-check and auto-load models
+        if embedding_model:
+            self._check_model_is_loaded(embedding_model, autoload=True)
+        if generation_model:
+            self._check_model_is_loaded(generation_model, autoload=True)
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        response = await self.client.post(
+            f"{self.base_url}/api/embeddings",
+            json={"model": self.embedding_model, "prompt": text},
+        )
+        response.raise_for_status()
+        return response.json()["embedding"]
+
+    async def embed_batch(
+        self, texts: list[str], batch_size: int = 32
+    ) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts using Ollama's batch API.
+
+        Uses /api/embed endpoint with array input for efficient batch processing.
+        Conservative batch size (32) prevents quality degradation observed in
+        Ollama issue #6262 with larger batches.
+
+        Note: Ollama processes batches serially, not in parallel.
+
+        Args:
+            texts: List of texts to embed
+            batch_size: Maximum texts per batch (default: 32)
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        all_embeddings = []
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i : i + batch_size]
+            response = await self.client.post(
+                f"{self.base_url}/api/embed",
+                json={"model": self.embedding_model, "input": batch},
+            )
+            response.raise_for_status()
+            all_embeddings.extend(response.json()["embeddings"])
+
+        return all_embeddings
+
+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+
+        This method queries the model to determine the actual dimension
+        instead of relying on hardcoded values.
+        """
+        if self._dimension is None and self.supports_embeddings:
+            logger.debug(
+                f"Detecting embedding dimension for model {self.embedding_model}..."
+            )
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
+        return self._dimension
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        response = await self.client.post(
+            f"{self.base_url}/api/generate",
+            json={
+                "model": self.generation_model,
+                "prompt": prompt,
+                "stream": False,
+                "options": {
+                    "num_predict": max_tokens,
+                    "temperature": 0.7,
+                },
+            },
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data["response"]
+
+    def _check_model_is_loaded(self, model: str, autoload: bool = True):
+        """
+        Check if model is loaded in Ollama, optionally auto-loading it.
+
+        Args:
+            model: Model name to check
+            autoload: Whether to automatically pull the model if not loaded
+        """
+        response = httpx.get(f"{self.base_url}/api/tags")
+        response.raise_for_status()
+
+        models = [m["name"] for m in response.json().get("models", [])]
+        logger.info("Ollama has following models pre-loaded: %s", models)
+
+        if (model not in models) and autoload:
+            logger.warning(
+                "Model '%s' not yet available in ollama, attempting to pull now...",
+                model,
+            )
+            response = httpx.post(f"{self.base_url}/api/pull", json={"model": model})
+            response.raise_for_status()
+
+    async def close(self) -> None:
+        """Close HTTP client."""
+        await self.client.aclose()
@@ -0,0 +1,227 @@
+"""Unified OpenAI provider for embeddings and text generation.
+
+Supports:
+- OpenAI's standard API
+- GitHub Models API (models.github.ai)
+- Any OpenAI-compatible API via base_url override
+"""
+
+import logging
+
+from openai import AsyncOpenAI
+
+from .base import Provider
+
+logger = logging.getLogger(__name__)
+
+# Well-known embedding dimensions for OpenAI models
+OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = {
+    "text-embedding-3-small": 1536,
+    "text-embedding-3-large": 3072,
+    "text-embedding-ada-002": 1536,
+    # GitHub Models API uses openai/ prefix
+    "openai/text-embedding-3-small": 1536,
+    "openai/text-embedding-3-large": 3072,
+}
+
+
+class OpenAIProvider(Provider):
+    """
+    OpenAI provider supporting both embeddings and text generation.
+
+    Works with:
+    - OpenAI's standard API (api.openai.com)
+    - GitHub Models API (models.github.ai)
+    - Any OpenAI-compatible API (via base_url)
+    """
+
+    def __init__(
+        self,
+        api_key: str,
+        base_url: str | None = None,
+        embedding_model: str | None = None,
+        generation_model: str | None = None,
+        timeout: float = 120.0,
+    ):
+        """
+        Initialize OpenAI provider.
+
+        Args:
+            api_key: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
+            base_url: Base URL override (e.g., "https://models.github.ai/inference")
+            embedding_model: Model for embeddings (e.g., "text-embedding-3-small").
+                            None disables embeddings.
+            generation_model: Model for text generation (e.g., "gpt-4o-mini").
+                             None disables generation.
+            timeout: HTTP timeout in seconds (default: 120)
+        """
+        self.embedding_model = embedding_model
+        self.generation_model = generation_model
+        self._dimension: int | None = None
+
+        # Initialize async client
+        self.client = AsyncOpenAI(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=timeout,
+        )
+
+        # Try to get known dimension without API call
+        if embedding_model and embedding_model in OPENAI_EMBEDDING_DIMENSIONS:
+            self._dimension = OPENAI_EMBEDDING_DIMENSIONS[embedding_model]
+
+        logger.info(
+            f"Initialized OpenAI provider: base_url={base_url or 'default'} "
+            f"(embedding_model={embedding_model}, generation_model={generation_model}, "
+            f"dimension={self._dimension})"
+        )
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return self.embedding_model is not None
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return self.generation_model is not None
+
+    async def embed(self, text: str) -> list[float]:
+        """
+        Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        response = await self.client.embeddings.create(
+            input=text,
+            model=self.embedding_model,
+        )
+
+        embedding = response.data[0].embedding
+
+        # Update dimension if not set
+        if self._dimension is None:
+            self._dimension = len(embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} "
+                f"for model {self.embedding_model}"
+            )
+
+        return embedding
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """
+        Generate embeddings for multiple texts using OpenAI's batch API.
+
+        OpenAI supports up to 2048 inputs per request.
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if not texts:
+            return []
+
+        # OpenAI supports batches up to 2048, but use smaller batches for safety
+        batch_size = 100
+        all_embeddings: list[list[float]] = []
+
+        for i in range(0, len(texts), batch_size):
+            batch = texts[i : i + batch_size]
+
+            response = await self.client.embeddings.create(
+                input=batch,
+                model=self.embedding_model,
+            )
+
+            # Sort by index to maintain order
+            sorted_data = sorted(response.data, key=lambda x: x.index)
+            batch_embeddings = [item.embedding for item in sorted_data]
+            all_embeddings.extend(batch_embeddings)
+
+            # Update dimension if not set
+            if self._dimension is None and batch_embeddings:
+                self._dimension = len(batch_embeddings[0])
+                logger.info(
+                    f"Detected embedding dimension: {self._dimension} "
+                    f"for model {self.embedding_model}"
+                )
+
+        return all_embeddings
+
+    def get_dimension(self) -> int:
+        """
+        Get embedding dimension.
+
+        Returns:
+            Vector dimension for the configured embedding model
+
+        Raises:
+            NotImplementedError: If embeddings not enabled (no embedding_model)
+            RuntimeError: If dimension not detected yet (call embed first)
+        """
+        if not self.supports_embeddings:
+            raise NotImplementedError(
+                "Embedding not supported - no embedding_model configured"
+            )
+
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.embedding_model}. "
+                "Call embed() first or use a known model."
+            )
+        return self._dimension
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate from
+            max_tokens: Maximum tokens to generate
+
+        Returns:
+            Generated text
+
+        Raises:
+            NotImplementedError: If generation not enabled (no generation_model)
+        """
+        if not self.supports_generation:
+            raise NotImplementedError(
+                "Text generation not supported - no generation_model configured"
+            )
+
+        response = await self.client.chat.completions.create(
+            model=self.generation_model,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=max_tokens,
+            temperature=0.7,
+        )
+
+        return response.choices[0].message.content or ""
+
+    async def close(self) -> None:
+        """Close HTTP client."""
+        await self.client.close()
@@ -0,0 +1,156 @@
+"""Provider registry and factory for auto-detection and instantiation."""
+
+import logging
+import os
+
+from .base import Provider
+from .bedrock import BedrockProvider
+from .ollama import OllamaProvider
+from .openai import OpenAIProvider
+from .simple import SimpleProvider
+
+logger = logging.getLogger(__name__)
+
+
+class ProviderRegistry:
+    """
+    Registry for provider auto-detection and instantiation.
+
+    Checks environment variables in priority order and creates appropriate provider:
+    1. Bedrock (AWS_REGION + BEDROCK_*_MODEL)
+    2. OpenAI (OPENAI_API_KEY)
+    3. Ollama (OLLAMA_BASE_URL)
+    4. Simple (fallback for testing/development)
+    """
+
+    @staticmethod
+    def create_provider() -> Provider:
+        """
+        Auto-detect and create provider based on environment variables.
+
+        Priority order:
+        1. Bedrock - if AWS_REGION or BEDROCK_EMBEDDING_MODEL is set
+        2. OpenAI - if OPENAI_API_KEY is set
+        3. Ollama - if OLLAMA_BASE_URL is set
+        4. Simple - fallback for testing/development
+
+        Returns:
+            Provider instance
+
+        Environment Variables:
+            Bedrock:
+                - AWS_REGION: AWS region (e.g., "us-east-1")
+                - AWS_ACCESS_KEY_ID: AWS access key (optional, uses credential chain)
+                - AWS_SECRET_ACCESS_KEY: AWS secret key (optional)
+                - BEDROCK_EMBEDDING_MODEL: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+                - BEDROCK_GENERATION_MODEL: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+            OpenAI:
+                - OPENAI_API_KEY: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
+                - OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
+                - OPENAI_EMBEDDING_MODEL: Model for embeddings (default: "text-embedding-3-small")
+                - OPENAI_GENERATION_MODEL: Model for text generation (e.g., "gpt-4o-mini")
+
+            Ollama:
+                - OLLAMA_BASE_URL: Ollama API base URL (e.g., "http://localhost:11434")
+                - OLLAMA_EMBEDDING_MODEL: Model for embeddings (default: "nomic-embed-text")
+                - OLLAMA_GENERATION_MODEL: Model for text generation (e.g., "llama3.2:1b")
+                - OLLAMA_VERIFY_SSL: Verify SSL certificates (default: "true")
+
+            Simple (no configuration needed, fallback):
+                - SIMPLE_EMBEDDING_DIMENSION: Embedding dimension (default: 384)
+        """
+        # 1. Check for Bedrock
+        aws_region = os.getenv("AWS_REGION")
+        bedrock_embedding_model = os.getenv("BEDROCK_EMBEDDING_MODEL")
+        bedrock_generation_model = os.getenv("BEDROCK_GENERATION_MODEL")
+
+        if aws_region or bedrock_embedding_model or bedrock_generation_model:
+            logger.info(
+                f"Using Bedrock provider: region={aws_region}, "
+                f"embedding_model={bedrock_embedding_model}, "
+                f"generation_model={bedrock_generation_model}"
+            )
+            return BedrockProvider(
+                region_name=aws_region,
+                embedding_model=bedrock_embedding_model,
+                generation_model=bedrock_generation_model,
+                aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+                aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
+            )
+
+        # 2. Check for OpenAI
+        openai_api_key = os.getenv("OPENAI_API_KEY")
+        if openai_api_key:
+            base_url = os.getenv("OPENAI_BASE_URL")
+            embedding_model = os.getenv(
+                "OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
+            )
+            generation_model = os.getenv("OPENAI_GENERATION_MODEL")
+
+            logger.info(
+                f"Using OpenAI provider: base_url={base_url or 'default'}, "
+                f"embedding_model={embedding_model}, "
+                f"generation_model={generation_model}"
+            )
+            return OpenAIProvider(
+                api_key=openai_api_key,
+                base_url=base_url,
+                embedding_model=embedding_model,
+                generation_model=generation_model,
+            )
+
+        # 3. Check for Ollama (local LLM)
+        ollama_url = os.getenv("OLLAMA_BASE_URL")
+        if ollama_url:
+            embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
+            generation_model = os.getenv("OLLAMA_GENERATION_MODEL")
+            verify_ssl = os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true"
+
+            logger.info(
+                f"Using Ollama provider: {ollama_url}, "
+                f"embedding_model={embedding_model}, "
+                f"generation_model={generation_model}"
+            )
+            return OllamaProvider(
+                base_url=ollama_url,
+                embedding_model=embedding_model,
+                generation_model=generation_model,
+                verify_ssl=verify_ssl,
+            )
+
+        # 4. Fallback to Simple provider for development/testing
+        dimension = int(os.getenv("SIMPLE_EMBEDDING_DIMENSION", "384"))
+        logger.warning(
+            "No provider configured (AWS_REGION, OPENAI_API_KEY, OLLAMA_BASE_URL not set). "
+            "Using SimpleProvider for testing/development. "
+            "For production, configure Bedrock, OpenAI, or Ollama."
+        )
+        return SimpleProvider(dimension=dimension)
+
+
+# Singleton instance
+_provider: Provider | None = None
+
+
+def get_provider() -> Provider:
+    """
+    Get singleton provider instance.
+
+    Returns:
+        Global Provider instance (auto-detected on first call)
+    """
+    global _provider
+    if _provider is None:
+        _provider = ProviderRegistry.create_provider()
+    return _provider
+
+
+def reset_provider():
+    """
+    Reset singleton provider instance.
+
+    Useful for testing or reconfiguration.
+    """
+    global _provider
+    _provider = None
@@ -0,0 +1,149 @@
+"""Simple in-process embedding provider for testing.
+
+This provider uses a basic TF-IDF-like approach with feature hashing to generate
+deterministic embeddings without requiring external services. Suitable for testing
+but not for production use.
+"""
+
+import hashlib
+import math
+import re
+from collections import Counter
+
+from .base import Provider
+
+
+class SimpleProvider(Provider):
+    """Simple deterministic embedding provider using feature hashing.
+
+    This implementation:
+    - Tokenizes text into words
+    - Uses feature hashing to map words to fixed-size vectors
+    - Applies TF-IDF-like weighting
+    - Normalizes vectors to unit length
+
+    Not suitable for production but good for testing semantic search infrastructure.
+    Only supports embeddings, not text generation.
+    """
+
+    def __init__(self, dimension: int = 384):
+        """Initialize simple embedding provider.
+
+        Args:
+            dimension: Embedding dimension (default: 384)
+        """
+        self.dimension = dimension
+
+    @property
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        return True
+
+    @property
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        return False
+
+    def _tokenize(self, text: str) -> list[str]:
+        """Tokenize text into lowercase words.
+
+        Args:
+            text: Input text
+
+        Returns:
+            List of lowercase word tokens
+        """
+        # Simple word tokenization
+        text = text.lower()
+        words = re.findall(r"\b\w+\b", text)
+        return words
+
+    def _hash_word(self, word: str) -> int:
+        """Hash word to dimension index.
+
+        Args:
+            word: Word to hash
+
+        Returns:
+            Index in range [0, dimension)
+        """
+        hash_bytes = hashlib.md5(word.encode()).digest()
+        hash_int = int.from_bytes(hash_bytes[:4], byteorder="big")
+        return hash_int % self.dimension
+
+    def _embed_single(self, text: str) -> list[float]:
+        """Generate embedding for single text.
+
+        Args:
+            text: Input text
+
+        Returns:
+            Normalized embedding vector
+        """
+        tokens = self._tokenize(text)
+        if not tokens:
+            return [0.0] * self.dimension
+
+        # Count term frequencies
+        term_freq = Counter(tokens)
+
+        # Initialize vector
+        vector = [0.0] * self.dimension
+
+        # Apply TF weighting with feature hashing
+        for word, count in term_freq.items():
+            idx = self._hash_word(word)
+            # Simple TF weighting: log(1 + count)
+            vector[idx] += math.log1p(count)
+
+        # Normalize to unit length
+        norm = math.sqrt(sum(x * x for x in vector))
+        if norm > 0:
+            vector = [x / norm for x in vector]
+
+        return vector
+
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding vector for text.
+
+        Args:
+            text: Input text to embed
+
+        Returns:
+            Vector embedding as list of floats
+        """
+        return self._embed_single(text)
+
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings for multiple texts.
+
+        Args:
+            texts: List of texts to embed
+
+        Returns:
+            List of vector embeddings
+        """
+        return [self._embed_single(text) for text in texts]
+
+    def get_dimension(self) -> int:
+        """Get embedding dimension.
+
+        Returns:
+            Vector dimension
+        """
+        return self.dimension
+
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """
+        Generate text from a prompt.
+
+        Raises:
+            NotImplementedError: Simple provider doesn't support text generation
+        """
+        raise NotImplementedError(
+            "Text generation not supported by Simple provider - use Ollama, Anthropic, or Bedrock"
+        )
+
+    async def close(self) -> None:
+        """Close the provider (no-op for simple provider)."""
+        pass
@@ -0,0 +1,27 @@
+"""Search algorithms module for BM25 hybrid search.
+
+This module provides BM25 hybrid search combining:
+- Dense semantic vectors (vector similarity via embeddings)
+- Sparse BM25 vectors (keyword-based retrieval)
+
+Results are fused using Qdrant's native Reciprocal Rank Fusion (RRF) for
+optimal relevance across both semantic and keyword queries.
+"""
+
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+    get_indexed_doc_types,
+)
+from nextcloud_mcp_server.search.bm25_hybrid import BM25HybridSearchAlgorithm
+from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
+
+__all__ = [
+    "NextcloudClientProtocol",
+    "SearchAlgorithm",
+    "SearchResult",
+    "get_indexed_doc_types",
+    "SemanticSearchAlgorithm",
+    "BM25HybridSearchAlgorithm",
+]
@@ -0,0 +1,232 @@
+"""Base interfaces and data structures for search algorithms."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class NextcloudClientProtocol(Protocol):
+    """Protocol for Nextcloud client supporting multi-document search.
+
+    This protocol defines the interface that search algorithms need from a
+    Nextcloud client to access documents across different apps (Notes, Files,
+    Calendar, etc.). The client provides access to app-specific sub-clients
+    that handle the actual API calls.
+
+    Document types (e.g., "note", "file", "calendar") are NOT 1:1 with apps.
+    For example, the Notes app specializes in markdown files, while Files/WebDAV
+    handles multiple file types. The abstraction is at the document type level.
+
+    Search algorithms query Qdrant to determine which document types are actually
+    indexed before attempting to access them, enabling graceful cross-app search.
+    """
+
+    username: str
+
+    # App-specific clients that search algorithms dispatch to
+    @property
+    def notes(self) -> Any:
+        """Notes client for accessing note documents."""
+        ...
+
+    @property
+    def webdav(self) -> Any:
+        """WebDAV client for accessing file documents."""
+        ...
+
+    @property
+    def calendar(self) -> Any:
+        """Calendar client for accessing event/task documents."""
+        ...
+
+    @property
+    def contacts(self) -> Any:
+        """Contacts client for accessing contact card documents."""
+        ...
+
+    @property
+    def deck(self) -> Any:
+        """Deck client for accessing deck card documents."""
+        ...
+
+    @property
+    def cookbook(self) -> Any:
+        """Cookbook client for accessing recipe documents."""
+        ...
+
+    @property
+    def tables(self) -> Any:
+        """Tables client for accessing table row documents."""
+        ...
+
+
+async def get_indexed_doc_types(user_id: str) -> set[str]:
+    """Query Qdrant to get actually-indexed document types for a user.
+
+    This enables search algorithms to check which document types are available
+    before attempting to search/verify them, allowing graceful cross-app search.
+
+    Args:
+        user_id: User ID to filter by
+
+    Returns:
+        Set of document type strings (e.g., {"note", "file", "calendar"})
+
+    Example:
+        >>> types = await get_indexed_doc_types("alice")
+        >>> if "note" in types:
+        ...     # Search notes
+    """
+    import logging
+
+    from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+    from nextcloud_mcp_server.config import get_settings
+    from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+    from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+    logger = logging.getLogger(__name__)
+    settings = get_settings()
+
+    qdrant_client = await get_qdrant_client()
+    collection = settings.get_collection_name()
+
+    # Use scroll to sample documents and extract doc_types
+    # Note: This could be optimized with a facet/aggregation query if Qdrant adds support
+    try:
+        scroll_results, _next_offset = await qdrant_client.scroll(
+            collection_name=collection,
+            scroll_filter=Filter(
+                must=[
+                    get_placeholder_filter(),  # Exclude placeholders from doc_type discovery
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                ]
+            ),
+            limit=1000,  # Sample size to discover types
+            with_payload=["doc_type"],
+            with_vectors=False,  # Don't need vectors for type discovery
+        )
+
+        doc_types = {
+            point.payload.get("doc_type")
+            for point in scroll_results
+            if point.payload.get("doc_type")
+        }
+
+        logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
+        return doc_types
+
+    except Exception as e:
+        logger.warning(f"Failed to query Qdrant for doc_types: {e}")
+        return set()
+
+
+@dataclass
+class SearchResult:
+    """A single search result with metadata and score.
+
+    Attributes:
+        id: Document ID (int for all document types)
+        doc_type: Document type (note, file, calendar, contact, etc.)
+        title: Document title
+        excerpt: Content excerpt showing match context
+        score: Relevance score (≥ 0.0, higher is better)
+            - RRF fusion: scores in [0.0, 1.0]
+            - DBSF fusion: scores can exceed 1.0 (sum of normalized scores)
+        metadata: Additional algorithm-specific metadata
+        chunk_start_offset: Character position where chunk starts (None if not available)
+        chunk_end_offset: Character position where chunk ends (None if not available)
+        page_number: Page number for PDF documents (None for other doc types)
+        chunk_index: Zero-based index of this chunk in the document
+        total_chunks: Total number of chunks in the document
+        point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
+    """
+
+    id: int
+    doc_type: str
+    title: str
+    excerpt: str
+    score: float
+    metadata: dict[str, Any] | None = None
+    chunk_start_offset: int | None = None
+    chunk_end_offset: int | None = None
+    page_number: int | None = None
+    chunk_index: int = 0
+    total_chunks: int = 1
+    point_id: str | None = None
+
+    def __post_init__(self):
+        """Validate score is non-negative.
+
+        Note: Different fusion methods produce different score ranges:
+        - RRF (Reciprocal Rank Fusion): Bounded to [0.0, 1.0]
+        - DBSF (Distribution-Based Score Fusion): Unbounded (can exceed 1.0)
+          DBSF sums normalized scores from multiple systems, so scores can be
+          1.5, 2.0, etc. when multiple systems agree a document is highly relevant.
+        """
+        if self.score < 0.0:
+            raise ValueError(f"Score must be non-negative, got {self.score}")
+
+
+class SearchAlgorithm(ABC):
+    """Abstract base class for search algorithms.
+
+    All search algorithms must implement the search() method with consistent
+    interface, allowing them to be used interchangeably.
+
+    Attributes:
+        query_embedding: The query embedding generated during the last search.
+            Available after search() completes for algorithms that use embeddings.
+            Can be reused by callers to avoid redundant embedding generation.
+    """
+
+    query_embedding: list[float] | None = None
+
+    @abstractmethod
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute search with the given parameters.
+
+        Args:
+            query: Search query string
+            user_id: User ID for multi-tenant filtering
+            limit: Maximum number of results to return
+            doc_type: Optional document type filter (note, file, calendar, etc.)
+            **kwargs: Algorithm-specific parameters
+
+        Returns:
+            List of SearchResult objects ranked by relevance
+
+        Raises:
+            McpError: If search fails or configuration is invalid
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Return algorithm name for identification."""
+        pass
+
+    @property
+    def supports_scoring(self) -> bool:
+        """Whether this algorithm provides meaningful relevance scores.
+
+        Default: True. Override if algorithm doesn't support scoring.
+        """
+        return True
+
+    @property
+    def requires_vector_db(self) -> bool:
+        """Whether this algorithm requires vector database.
+
+        Default: False. Override for semantic search.
+        """
+        return False
@@ -0,0 +1,253 @@
+"""BM25 hybrid search algorithm using Qdrant native RRF fusion."""
+
+import logging
+from typing import Any
+
+from qdrant_client import models
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.embedding import get_bm25_service, get_embedding_service
+from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
+from nextcloud_mcp_server.observability.tracing import trace_operation
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+class BM25HybridSearchAlgorithm(SearchAlgorithm):
+    """
+    Hybrid search combining dense semantic vectors with BM25 sparse vectors.
+
+    Uses Qdrant's native Reciprocal Rank Fusion (RRF) to automatically merge
+    results from both dense (semantic) and sparse (BM25 keyword) searches.
+    This provides the best of both worlds: semantic understanding for conceptual
+    queries and precise keyword matching for specific terms, acronyms, and codes.
+
+    The fusion happens efficiently in the database using the prefetch mechanism,
+    eliminating the need for application-layer result merging.
+    """
+
+    def __init__(self, score_threshold: float = 0.0, fusion: str = "rrf"):
+        """
+        Initialize BM25 hybrid search algorithm.
+
+        Args:
+            score_threshold: Minimum fusion score (0-1, default: 0.0 to allow fusion scoring)
+                           Note: Both RRF and DBSF produce normalized scores
+            fusion: Fusion algorithm to use: "rrf" (Reciprocal Rank Fusion, default)
+                   or "dbsf" (Distribution-Based Score Fusion)
+
+        Raises:
+            ValueError: If fusion is not "rrf" or "dbsf"
+        """
+        if fusion not in ("rrf", "dbsf"):
+            raise ValueError(
+                f"Invalid fusion algorithm '{fusion}'. Must be 'rrf' or 'dbsf'"
+            )
+
+        self.score_threshold = score_threshold
+        self.fusion = models.Fusion.RRF if fusion == "rrf" else models.Fusion.DBSF
+        self.fusion_name = fusion
+
+    @property
+    def name(self) -> str:
+        return "bm25_hybrid"
+
+    @property
+    def requires_vector_db(self) -> bool:
+        return True
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """
+        Execute hybrid search using dense + sparse vectors with native RRF fusion.
+
+        Returns unverified results from Qdrant. Access verification should be
+        performed separately at the final output stage using verify_search_results().
+
+        Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
+        to show multiple chunks from the same document while avoiding duplicate chunks.
+
+        Args:
+            query: Natural language or keyword search query
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter
+            **kwargs: Additional parameters (score_threshold override)
+
+        Returns:
+            List of unverified SearchResult objects ranked by RRF fusion score
+
+        Raises:
+            McpError: If vector sync is not enabled or search fails
+        """
+        settings = get_settings()
+        score_threshold = kwargs.get("score_threshold", self.score_threshold)
+
+        logger.info(
+            f"BM25 hybrid search: query='{query}', user={user_id}, "
+            f"limit={limit}, score_threshold={score_threshold}, doc_type={doc_type}, "
+            f"fusion={self.fusion_name}"
+        )
+
+        # Generate dense embedding for semantic search
+        with trace_operation("search.get_embedding_service"):
+            embedding_service = get_embedding_service()
+        with trace_operation("search.dense_embedding"):
+            dense_embedding = await embedding_service.embed(query)
+        # Store for reuse by callers (e.g., viz_routes PCA visualization)
+        self.query_embedding = dense_embedding
+        logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
+
+        # Generate sparse embedding for BM25 keyword search
+        with trace_operation("search.get_bm25_service"):
+            bm25_service = get_bm25_service()
+        with trace_operation("search.sparse_embedding_bm25"):
+            sparse_embedding = await bm25_service.encode_async(query)
+        logger.debug(
+            f"Generated sparse embedding "
+            f"({len(sparse_embedding['indices'])} non-zero terms)"
+        )
+
+        # Build Qdrant filter
+        filter_conditions = [
+            get_placeholder_filter(),  # Always exclude placeholders from user-facing queries
+            FieldCondition(
+                key="user_id",
+                match=MatchValue(value=user_id),
+            ),
+        ]
+
+        # Add doc_type filter if specified
+        if doc_type:
+            filter_conditions.append(
+                FieldCondition(
+                    key="doc_type",
+                    match=MatchValue(value=doc_type),
+                )
+            )
+
+        query_filter = Filter(must=filter_conditions)
+
+        # Execute hybrid search with Qdrant native RRF fusion
+        with trace_operation("search.get_qdrant_client"):
+            qdrant_client = await get_qdrant_client()
+
+        try:
+            # Use prefetch to run both dense and sparse searches
+            # Qdrant will automatically merge results using RRF
+            with trace_operation(
+                "search.qdrant_query",
+                attributes={"query.limit": limit * 2, "query.fusion": self.fusion_name},
+            ):
+                search_response = await qdrant_client.query_points(
+                    collection_name=settings.get_collection_name(),
+                    prefetch=[
+                        # Dense semantic search
+                        models.Prefetch(
+                            query=dense_embedding,
+                            using="dense",
+                            limit=limit * 2,  # Get extra for deduplication
+                            filter=query_filter,
+                        ),
+                        # Sparse BM25 search
+                        models.Prefetch(
+                            query=models.SparseVector(
+                                indices=sparse_embedding["indices"],
+                                values=sparse_embedding["values"],
+                            ),
+                            using="sparse",
+                            limit=limit * 2,  # Get extra for deduplication
+                            filter=query_filter,
+                        ),
+                    ],
+                    # Fusion query (RRF or DBSF based on initialization)
+                    query=models.FusionQuery(fusion=self.fusion),
+                    limit=limit * 2,  # Get extra for deduplication
+                    score_threshold=score_threshold,
+                    with_payload=True,
+                    with_vectors=False,  # Don't return vectors to save bandwidth
+                )
+            record_qdrant_operation("search", "success")
+        except Exception:
+            record_qdrant_operation("search", "error")
+            raise
+
+        logger.info(
+            f"Qdrant {self.fusion_name.upper()} fusion returned {len(search_response.points)} results "
+            f"(before deduplication)"
+        )
+
+        if search_response.points:
+            # Log top 3 fusion scores to help with threshold tuning
+            top_scores = [p.score for p in search_response.points[:3]]
+            logger.debug(
+                f"Top 3 {self.fusion_name.upper()} fusion scores: {top_scores}"
+            )
+
+        # Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
+        # This allows multiple chunks from same doc, but removes duplicate chunks
+        with trace_operation(
+            "search.deduplicate",
+            attributes={"dedupe.num_points": len(search_response.points)},
+        ):
+            seen_chunks = set()
+            results = []
+
+            for result in search_response.points:
+                # doc_id can be int (notes) or str (files - file paths)
+                doc_id = result.payload["doc_id"]
+                doc_type = result.payload.get("doc_type", "note")
+                chunk_start = result.payload.get("chunk_start_offset")
+                chunk_end = result.payload.get("chunk_end_offset")
+                chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
+
+                # Skip if we've already seen this exact chunk
+                if chunk_key in seen_chunks:
+                    continue
+
+                seen_chunks.add(chunk_key)
+
+                # Return unverified results (verification happens at output stage)
+                results.append(
+                    SearchResult(
+                        id=doc_id,
+                        doc_type=doc_type,
+                        title=result.payload.get("title", "Untitled"),
+                        excerpt=result.payload.get("excerpt", ""),
+                        score=result.score,  # Fusion score (RRF or DBSF)
+                        metadata={
+                            "chunk_index": result.payload.get("chunk_index"),
+                            "total_chunks": result.payload.get("total_chunks"),
+                            "search_method": f"bm25_hybrid_{self.fusion_name}",
+                        },
+                        chunk_start_offset=result.payload.get("chunk_start_offset"),
+                        chunk_end_offset=result.payload.get("chunk_end_offset"),
+                        page_number=result.payload.get("page_number"),
+                        chunk_index=result.payload.get("chunk_index", 0),
+                        total_chunks=result.payload.get("total_chunks", 1),
+                        point_id=str(result.id),  # Qdrant point ID for batch retrieval
+                    )
+                )
+
+                if len(results) >= limit:
+                    break
+
+        logger.info(f"Returning {len(results)} unverified results after deduplication")
+        if results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in results[:5]  # Show top 5
+            ]
+            logger.debug(f"Top results: {', '.join(result_details)}")
+
+        return results
@@ -0,0 +1,598 @@
+"""Context expansion for search results.
+
+Provides utilities to expand matched chunks with surrounding context and
+position markers for better visualization and understanding of search results.
+"""
+
+import logging
+from dataclasses import dataclass
+
+from nextcloud_mcp_server.client import NextcloudClient
+
+logger = logging.getLogger(__name__)
+
+
+async def _get_chunk_from_qdrant(
+    user_id: str, doc_id: int, doc_type: str, chunk_start: int, chunk_end: int
+) -> str | None:
+    """Retrieve full chunk text from Qdrant payload.
+
+    This avoids re-fetching and re-parsing documents by using the cached
+    chunk content already stored in Qdrant.
+
+    Args:
+        user_id: User ID who owns the document
+        doc_id: Document ID
+        doc_type: Document type (e.g., "note", "file")
+        chunk_start: Character offset where chunk starts
+        chunk_end: Character offset where chunk ends
+
+    Returns:
+        Full chunk text from Qdrant excerpt field, or None if not found
+    """
+    try:
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+        from nextcloud_mcp_server.config import get_settings
+        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Query for the specific chunk
+        scroll_result = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
+                    FieldCondition(
+                        key="chunk_start_offset", match=MatchValue(value=chunk_start)
+                    ),
+                    FieldCondition(
+                        key="chunk_end_offset", match=MatchValue(value=chunk_end)
+                    ),
+                ]
+            ),
+            limit=1,
+            with_payload=["excerpt"],
+            with_vectors=False,
+        )
+
+        if scroll_result[0]:
+            point = scroll_result[0][0]
+            excerpt = point.payload.get("excerpt")
+            if excerpt:
+                logger.debug(
+                    f"Retrieved chunk from Qdrant for {doc_type} {doc_id}: "
+                    f"{len(excerpt)} chars"
+                )
+                return str(excerpt)
+
+        logger.debug(
+            f"Chunk not found in Qdrant for {doc_type} {doc_id}, "
+            f"chunk [{chunk_start}:{chunk_end}]. Will fall back to document fetch."
+        )
+        return None
+
+    except Exception as e:
+        logger.error(
+            f"Error querying Qdrant for chunk: {e}. Falling back to document fetch.",
+            exc_info=True,
+        )
+        return None
+
+
+async def _get_chunk_by_index_from_qdrant(
+    user_id: str, doc_id: int, doc_type: str, chunk_index: int
+) -> str | None:
+    """Retrieve chunk text by chunk_index from Qdrant payload.
+
+    Used to fetch adjacent chunks for context expansion.
+
+    Args:
+        user_id: User ID who owns the document
+        doc_id: Document ID
+        doc_type: Document type (e.g., "note", "file")
+        chunk_index: Zero-based chunk index in document
+
+    Returns:
+        Full chunk text from Qdrant excerpt field, or None if not found
+    """
+    try:
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+        from nextcloud_mcp_server.config import get_settings
+        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Query for chunk by index
+        scroll_result = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
+                    FieldCondition(
+                        key="chunk_index", match=MatchValue(value=chunk_index)
+                    ),
+                ]
+            ),
+            limit=1,
+            with_payload=["excerpt"],
+            with_vectors=False,
+        )
+
+        if scroll_result[0]:
+            point = scroll_result[0][0]
+            excerpt = point.payload.get("excerpt")
+            if excerpt:
+                logger.debug(
+                    f"Retrieved adjacent chunk {chunk_index} from Qdrant for "
+                    f"{doc_type} {doc_id}: {len(excerpt)} chars"
+                )
+                return str(excerpt)
+
+        return None
+
+    except Exception as e:
+        logger.debug(
+            f"Could not retrieve adjacent chunk {chunk_index} for "
+            f"{doc_type} {doc_id}: {e}"
+        )
+        return None
+
+
+async def _get_file_path_from_qdrant(
+    user_id: str, file_id: int, chunk_start: int, chunk_end: int
+) -> str | None:
+    """Resolve file_id to file_path by querying Qdrant payload.
+
+    Args:
+        user_id: User ID who owns the file
+        file_id: Numeric file ID
+        chunk_start: Character offset where chunk starts
+        chunk_end: Character offset where chunk ends
+
+    Returns:
+        File path string, or None if not found in Qdrant
+    """
+    try:
+        from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+        from nextcloud_mcp_server.config import get_settings
+        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Query for the specific chunk
+        scroll_result = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=file_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value="file")),
+                    FieldCondition(
+                        key="chunk_start_offset", match=MatchValue(value=chunk_start)
+                    ),
+                    FieldCondition(
+                        key="chunk_end_offset", match=MatchValue(value=chunk_end)
+                    ),
+                ]
+            ),
+            limit=1,
+            with_payload=["file_path"],
+            with_vectors=False,
+        )
+
+        if scroll_result[0]:
+            point = scroll_result[0][0]
+            file_path = point.payload.get("file_path")
+            if file_path:
+                logger.debug(f"Resolved file_id {file_id} to file_path {file_path}")
+                return str(file_path)
+
+        logger.warning(
+            f"Could not find file_path in Qdrant for file_id {file_id}, "
+            f"chunk [{chunk_start}:{chunk_end}]"
+        )
+        return None
+
+    except Exception as e:
+        logger.error(f"Error querying Qdrant for file_path: {e}", exc_info=True)
+        return None
+
+
+@dataclass
+class ChunkContext:
+    """Expanded chunk with surrounding context and position markers.
+
+    Attributes:
+        chunk_text: The matched chunk text
+        before_context: Text before the chunk (up to context_chars)
+        after_context: Text after the chunk (up to context_chars)
+        chunk_start_offset: Character position where chunk starts in document
+        chunk_end_offset: Character position where chunk ends in document
+        page_number: Page number for PDFs (None for other doc types)
+        chunk_index: Zero-based chunk index (N in "chunk N of M")
+        total_chunks: Total number of chunks in document
+        marked_text: Full text with position markers around the chunk
+        has_before_truncation: True if before_context was truncated
+        has_after_truncation: True if after_context was truncated
+    """
+
+    chunk_text: str
+    before_context: str
+    after_context: str
+    chunk_start_offset: int
+    chunk_end_offset: int
+    page_number: int | None
+    chunk_index: int
+    total_chunks: int
+    marked_text: str
+    has_before_truncation: bool
+    has_after_truncation: bool
+
+
+async def get_chunk_with_context(
+    nc_client: NextcloudClient,
+    user_id: str,
+    doc_id: str | int,
+    doc_type: str,
+    chunk_start: int,
+    chunk_end: int,
+    page_number: int | None = None,
+    chunk_index: int = 0,
+    total_chunks: int = 1,
+    context_chars: int = 300,
+) -> ChunkContext | None:
+    """Fetch chunk with surrounding context.
+
+    First tries to retrieve the chunk from Qdrant (fast, cached). If that fails
+    (e.g., legacy data with truncated excerpts), falls back to fetching and
+    parsing the full document (slower, for PDFs especially).
+
+    Args:
+        nc_client: Authenticated Nextcloud client
+        user_id: User ID who owns the document
+        doc_id: Document ID (int for notes/files)
+        doc_type: Type of document ("note", "file", etc.)
+        chunk_start: Character offset where chunk starts
+        chunk_end: Character offset where chunk ends
+        page_number: Optional page number for PDFs
+        chunk_index: Zero-based chunk index in document
+        total_chunks: Total number of chunks in document
+        context_chars: Number of characters to include before/after chunk
+
+    Returns:
+        ChunkContext with expanded context and markers, or None if document
+        cannot be retrieved
+    """
+    # Convert doc_id to int for Qdrant query
+    doc_id_int = (
+        int(doc_id)
+        if isinstance(doc_id, str) and doc_id.isdigit()
+        else (doc_id if isinstance(doc_id, int) else None)
+    )
+
+    # Try to get chunk from Qdrant first (fast path)
+    if doc_id_int is not None:
+        chunk_text = await _get_chunk_from_qdrant(
+            user_id, doc_id_int, doc_type, chunk_start, chunk_end
+        )
+        if chunk_text:
+            logger.info(
+                f"Retrieved chunk from Qdrant cache for {doc_type} {doc_id} "
+                f"(avoids document re-fetch/re-parse)"
+            )
+
+            # Fetch adjacent chunks for context expansion
+            # Get chunk overlap from config to remove duplicate text
+            from nextcloud_mcp_server.config import get_settings
+
+            settings = get_settings()
+            chunk_overlap = settings.document_chunk_overlap
+
+            before_context = ""
+            after_context = ""
+            has_before_truncation = False
+            has_after_truncation = False
+
+            # Fetch previous chunk if not first chunk
+            if chunk_index > 0:
+                before_chunk = await _get_chunk_by_index_from_qdrant(
+                    user_id, doc_id_int, doc_type, chunk_index - 1
+                )
+                if before_chunk:
+                    # Remove overlap: the last chunk_overlap chars of previous chunk
+                    # overlap with the first chunk_overlap chars of current chunk
+                    before_context = (
+                        before_chunk[:-chunk_overlap]
+                        if len(before_chunk) > chunk_overlap
+                        else ""
+                    )
+                    # Truncate if requested context_chars < remaining length
+                    if before_context and len(before_context) > context_chars:
+                        before_context = before_context[-context_chars:]
+                        has_before_truncation = True
+                else:
+                    # Could not fetch previous chunk, but we're not at start
+                    has_before_truncation = True
+
+            # Fetch next chunk if not last chunk
+            if chunk_index < total_chunks - 1:
+                after_chunk = await _get_chunk_by_index_from_qdrant(
+                    user_id, doc_id_int, doc_type, chunk_index + 1
+                )
+                if after_chunk:
+                    # Remove overlap: the first chunk_overlap chars of next chunk
+                    # overlap with the last chunk_overlap chars of current chunk
+                    after_context = (
+                        after_chunk[chunk_overlap:]
+                        if len(after_chunk) > chunk_overlap
+                        else ""
+                    )
+                    # Truncate if requested context_chars < remaining length
+                    if after_context and len(after_context) > context_chars:
+                        after_context = after_context[:context_chars]
+                        has_after_truncation = True
+                else:
+                    # Could not fetch next chunk, but we're not at end
+                    has_after_truncation = True
+
+            marked_text = _insert_position_markers(
+                before_context=before_context,
+                chunk_text=chunk_text,
+                after_context=after_context,
+                page_number=page_number,
+                chunk_index=chunk_index,
+                total_chunks=total_chunks,
+                has_before_truncation=has_before_truncation,
+                has_after_truncation=has_after_truncation,
+            )
+            return ChunkContext(
+                chunk_text=chunk_text,
+                before_context=before_context,
+                after_context=after_context,
+                chunk_start_offset=chunk_start,
+                chunk_end_offset=chunk_end,
+                page_number=page_number,
+                chunk_index=chunk_index,
+                total_chunks=total_chunks,
+                marked_text=marked_text,
+                has_before_truncation=has_before_truncation,
+                has_after_truncation=has_after_truncation,
+            )
+
+    # Fallback: Fetch full document and extract chunk with context
+    # This path is taken for:
+    # 1. Legacy data with truncated excerpts in Qdrant
+    # 2. Failed Qdrant queries
+    logger.info(
+        f"Falling back to document fetch for {doc_type} {doc_id} "
+        f"(Qdrant cache miss, possibly legacy data)"
+    )
+
+    # For files, retrieve file_path from Qdrant payload
+    resolved_doc_id = doc_id
+    if doc_type == "file" and isinstance(doc_id, int):
+        file_path = await _get_file_path_from_qdrant(
+            user_id, doc_id, chunk_start, chunk_end
+        )
+        if not file_path:
+            logger.warning(
+                f"Could not resolve file_id {doc_id} to file_path from Qdrant"
+            )
+            return None
+        resolved_doc_id = file_path
+        logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}")
+
+    # Fetch full document text
+    full_text = await _fetch_document_text(nc_client, resolved_doc_id, doc_type)
+    if full_text is None:
+        logger.warning(
+            f"Could not fetch document text for {doc_type} {doc_id}, "
+            "skipping context expansion"
+        )
+        return None
+
+    # Validate offsets
+    if chunk_start < 0 or chunk_end > len(full_text) or chunk_start >= chunk_end:
+        logger.warning(
+            f"Invalid chunk offsets for {doc_type} {doc_id}: "
+            f"start={chunk_start}, end={chunk_end}, doc_len={len(full_text)}"
+        )
+        return None
+
+    # Extract chunk text
+    chunk_text = full_text[chunk_start:chunk_end]
+
+    # Calculate context boundaries
+    context_start = max(0, chunk_start - context_chars)
+    context_end = min(len(full_text), chunk_end + context_chars)
+
+    # Extract context
+    before_context = full_text[context_start:chunk_start]
+    after_context = full_text[chunk_end:context_end]
+
+    # Check for truncation
+    has_before_truncation = context_start > 0
+    has_after_truncation = context_end < len(full_text)
+
+    # Create marked text with position markers
+    marked_text = _insert_position_markers(
+        before_context=before_context,
+        chunk_text=chunk_text,
+        after_context=after_context,
+        page_number=page_number,
+        chunk_index=chunk_index,
+        total_chunks=total_chunks,
+        has_before_truncation=has_before_truncation,
+        has_after_truncation=has_after_truncation,
+    )
+
+    return ChunkContext(
+        chunk_text=chunk_text,
+        before_context=before_context,
+        after_context=after_context,
+        chunk_start_offset=chunk_start,
+        chunk_end_offset=chunk_end,
+        page_number=page_number,
+        chunk_index=chunk_index,
+        total_chunks=total_chunks,
+        marked_text=marked_text,
+        has_before_truncation=has_before_truncation,
+        has_after_truncation=has_after_truncation,
+    )
+
+
+async def _fetch_document_text(
+    nc_client: NextcloudClient, doc_id: str | int, doc_type: str
+) -> str | None:
+    """Fetch full text content of a document.
+
+    Args:
+        nc_client: Authenticated Nextcloud client
+        doc_id: Document ID (note ID or file path)
+        doc_type: Type of document ("note", "file", etc.)
+
+    Returns:
+        Full document text, or None if document cannot be retrieved
+    """
+    try:
+        if doc_type == "note":
+            # Fetch note by ID
+            note = await nc_client.notes.get_note(note_id=int(doc_id))
+            # Reconstruct full content as indexed: title + "\n\n" + content
+            # This ensures chunk offsets align with indexed content structure
+            title = note.get("title", "")
+            content = note.get("content", "")
+            return f"{title}\n\n{content}"
+        elif doc_type == "file":
+            # Fetch file content via WebDAV
+            try:
+                file_path = str(doc_id)
+                file_content, content_type = await nc_client.webdav.read_file(file_path)
+
+                # Check if it's a PDF (by content type or file extension)
+                is_pdf = (
+                    content_type and "pdf" in content_type.lower()
+                ) or file_path.lower().endswith(".pdf")
+
+                if is_pdf:
+                    # Extract text from PDF using PyMuPDF
+                    # IMPORTANT: Use pymupdf4llm.to_markdown() to match indexing extraction
+                    # This ensures character offsets align between indexed chunks and retrieval
+                    import pymupdf
+                    import pymupdf4llm
+
+                    logger.debug(f"Extracting text from PDF: {file_path}")
+                    pdf_doc = pymupdf.open(stream=file_content, filetype="pdf")
+                    text_parts = []
+
+                    # Extract each page as markdown (same as indexing)
+                    for page_num in range(pdf_doc.page_count):
+                        page_md = pymupdf4llm.to_markdown(
+                            pdf_doc,
+                            pages=[page_num],
+                            write_images=False,  # Don't need images for context
+                            page_chunks=False,
+                        )
+                        text_parts.append(page_md)
+
+                    pdf_doc.close()
+
+                    # Join pages (no separator - matches indexing)
+                    full_text = "".join(text_parts)
+                    logger.debug(
+                        f"Extracted {len(full_text)} characters from "
+                        f"{pdf_doc.page_count} pages in {file_path}"
+                    )
+                    return full_text
+                else:
+                    # Assume it's a text file, decode to string
+                    logger.debug(f"Decoding text file: {file_path}")
+                    return file_content.decode("utf-8", errors="replace")
+            except Exception as e:
+                logger.error(
+                    f"Error fetching file content for {doc_id}: {e}", exc_info=True
+                )
+                return None
+        else:
+            logger.warning(f"Unsupported doc_type for context expansion: {doc_type}")
+            return None
+    except Exception as e:
+        logger.error(f"Error fetching document {doc_type} {doc_id}: {e}", exc_info=True)
+        return None
+
+
+def _insert_position_markers(
+    before_context: str,
+    chunk_text: str,
+    after_context: str,
+    page_number: int | None,
+    chunk_index: int,
+    total_chunks: int,
+    has_before_truncation: bool,
+    has_after_truncation: bool,
+) -> str:
+    """Insert position markers around matched chunk.
+
+    Creates markdown-formatted text with visual markers indicating chunk
+    boundaries and metadata.
+
+    Args:
+        before_context: Text before chunk
+        chunk_text: The matched chunk
+        after_context: Text after chunk
+        page_number: Optional page number
+        chunk_index: Zero-based chunk index
+        total_chunks: Total chunks in document
+        has_before_truncation: Whether before_context is truncated
+        has_after_truncation: Whether after_context is truncated
+
+    Returns:
+        Formatted text with position markers
+    """
+    # Build position metadata
+    position_parts = []
+    if page_number is not None:
+        position_parts.append(f"Page {page_number}")
+    position_parts.append(f"Chunk {chunk_index + 1} of {total_chunks}")
+    position_metadata = ", ".join(position_parts)
+
+    # Build marked text
+    parts = []
+
+    # Add truncation indicator for before context
+    if has_before_truncation:
+        parts.append("**[...]**\n\n")
+
+    # Add before context if present
+    if before_context:
+        parts.append(before_context)
+
+    # Add chunk start marker
+    parts.append(f"\n\n🔍 **MATCHED CHUNK START** ({position_metadata})\n\n")
+
+    # Add chunk text
+    parts.append(chunk_text)
+
+    # Add chunk end marker
+    parts.append("\n\n🔍 **MATCHED CHUNK END**\n\n")
+
+    # Add after context if present
+    if after_context:
+        parts.append(after_context)
+
+    # Add truncation indicator for after context
+    if has_after_truncation:
+        parts.append("\n\n**[...]**")
+
+    return "".join(parts)
@@ -0,0 +1,907 @@
+"""PDF chunk highlighting utilities for vector visualization.
+
+This module provides utilities to generate highlighted page images showing
+matched chunks and their context from semantic search results.
+
+The highlighting uses character offsets to precisely locate chunks within
+PDF documents, ensuring accurate highlighting even when text formatting
+varies between indexing and rendering.
+"""
+
+import logging
+import re
+from typing import Optional
+
+import pymupdf
+import pymupdf4llm
+
+logger = logging.getLogger(__name__)
+
+
+class PDFHighlighter:
+    """Generate highlighted page images from PDF chunks."""
+
+    # Color definitions (RGB, 0-1 range)
+    COLORS = {
+        "yellow": [1, 1, 0],
+        "red": [1, 0, 0],
+        "green": [0, 1, 0],
+        "blue": [0, 0, 1],
+        "orange": [1, 0.5, 0],
+        "pink": [1, 0, 1],
+        "gray": [0.7, 0.7, 0.7],
+        "light_blue": [0.7, 0.9, 1.0],
+        "light_green": [0.7, 1.0, 0.7],
+    }
+
+    @staticmethod
+    def strip_markdown(text: str) -> str:
+        """Remove markdown formatting to improve search accuracy.
+
+        Args:
+            text: Text with potential markdown formatting
+
+        Returns:
+            Plain text with markdown removed
+        """
+        # Remove bold/italic markers
+        text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
+        text = re.sub(r"\*(.+?)\*", r"\1", text)
+        text = re.sub(r"__(.+?)__", r"\1", text)
+        text = re.sub(r"_(.+?)_", r"\1", text)
+
+        # Remove headers
+        text = re.sub(r"^#+\s+", "", text, flags=re.MULTILINE)
+
+        # Remove inline code
+        text = re.sub(r"`(.+?)`", r"\1", text)
+
+        return text.strip()
+
+    @staticmethod
+    def extract_pdf_text_with_boundaries(
+        pdf_doc: pymupdf.Document,
+    ) -> tuple[str, list[dict]]:
+        """Extract full document text with page boundary tracking.
+
+        Uses pymupdf4llm.to_markdown() for consistency with indexing.
+
+        IMPORTANT: Must use write_images=True to match PyMuPDFProcessor behavior!
+        Even though we don't need the images, we need the image references in the
+        markdown text to maintain consistent character offsets with indexing.
+
+        Args:
+            pdf_doc: Open PyMuPDF document
+
+        Returns:
+            Tuple of (full_text, page_boundaries) where page_boundaries is a list of:
+            {"page": 1, "start_offset": 0, "end_offset": 1234}
+        """
+        import tempfile
+        from pathlib import Path
+
+        page_boundaries = []
+        text_parts = []
+        current_offset = 0
+
+        # Use temp directory for image output (images are discarded after extraction)
+        temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
+
+        for page_idx in range(pdf_doc.page_count):
+            page_md = pymupdf4llm.to_markdown(
+                pdf_doc,
+                pages=[page_idx],
+                write_images=True,  # Must match indexing! Otherwise offsets misalign
+                image_path=temp_dir,
+                page_chunks=False,
+            )
+
+            page_boundaries.append(
+                {
+                    "page": page_idx + 1,  # 1-indexed
+                    "start_offset": current_offset,
+                    "end_offset": current_offset + len(page_md),
+                }
+            )
+
+            text_parts.append(page_md)
+            current_offset += len(page_md)
+
+        full_text = "".join(text_parts)
+
+        # Clean up temp directory and extracted images
+        import shutil
+
+        try:
+            shutil.rmtree(temp_dir)
+        except Exception as e:
+            logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
+
+        return full_text, page_boundaries
+
+    @staticmethod
+    def find_chunk_page(
+        chunk_start_offset: int,
+        chunk_end_offset: int,
+        page_boundaries: list[dict],
+    ) -> Optional[dict]:
+        """Find which page contains the most of a given chunk.
+
+        Args:
+            chunk_start_offset: Chunk start position in full document
+            chunk_end_offset: Chunk end position in full document
+            page_boundaries: Page boundary list from extract_pdf_text_with_boundaries()
+
+        Returns:
+            Dict with keys: page_num, overlap_chars, page_relative_start, page_relative_end
+            or None if chunk not found on any page
+        """
+        chunk_pages = []
+
+        for boundary in page_boundaries:
+            page_start = boundary["start_offset"]
+            page_end = boundary["end_offset"]
+
+            # Check if chunk overlaps with this page
+            if chunk_start_offset < page_end and chunk_end_offset > page_start:
+                overlap_start = max(chunk_start_offset, page_start)
+                overlap_end = min(chunk_end_offset, page_end)
+                overlap_chars = overlap_end - overlap_start
+
+                chunk_pages.append(
+                    {
+                        "page_num": boundary["page"],
+                        "overlap_chars": overlap_chars,
+                        "page_relative_start": overlap_start - page_start,
+                        "page_relative_end": overlap_end - page_start,
+                    }
+                )
+
+        if not chunk_pages:
+            return None
+
+        # Return page with maximum overlap
+        return max(chunk_pages, key=lambda p: p["overlap_chars"])
+
+    @staticmethod
+    def highlight_chunk_by_word_positions(
+        page: pymupdf.Page,
+        chunk_text: str,
+        color: str = "yellow",
+        search_region: tuple[float, float, float, float] | None = None,
+    ) -> int:
+        """Highlight chunk using word-position matching.
+
+        This method matches words from the chunk to their positions on the PDF page,
+        avoiding text search mismatches between markdown-formatted text and raw PDF text.
+
+        Args:
+            page: PyMuPDF page object
+            chunk_text: Text to highlight (may contain markdown)
+            color: Color name from COLORS dict
+            search_region: Optional (x0, y0, x1, y1) bounding box to constrain search.
+                          If provided, only words within this region are considered.
+
+        Returns:
+            Number of highlight rectangles added
+        """
+        # Tokenize chunk into words (alphanumeric only, lowercase)
+        chunk_words = re.findall(
+            r"\w+", PDFHighlighter.strip_markdown(chunk_text).lower()
+        )
+
+        if not chunk_words:
+            logger.warning("No words found in chunk text")
+            return 0
+
+        # Get all words from page with positions
+        # Format: (x0, y0, x1, y1, "word", block_no, line_no, word_no)
+        try:
+            page_words = page.get_text("words")
+        except Exception as e:
+            logger.error(f"Failed to extract words from page: {e}")
+            return 0
+
+        if not page_words:
+            logger.warning("No words found on page")
+            return 0
+
+        # Filter words by search region if provided
+        if search_region:
+            rx0, ry0, rx1, ry1 = search_region
+            # Allow some tolerance (10 points) for words near region boundary
+            tolerance = 10
+            page_words = [
+                w
+                for w in page_words
+                if (
+                    w[0] >= rx0 - tolerance
+                    and w[2] <= rx1 + tolerance
+                    and w[1] >= ry0 - tolerance
+                    and w[3] <= ry1 + tolerance
+                )
+            ]
+            logger.debug(
+                f"Filtered to {len(page_words)} words in region "
+                f"({rx0:.0f}, {ry0:.0f}, {rx1:.0f}, {ry1:.0f})"
+            )
+
+        if not page_words:
+            logger.warning("No words found in search region")
+            return 0
+
+        # Find matching word sequence - use FIRST match, not longest
+        # This ensures we highlight the actual chunk location, not similar text elsewhere
+        matches = []
+
+        # Build a simple word-to-positions index for the first few chunk words
+        # to find candidate starting positions
+        first_chunk_word = chunk_words[0] if chunk_words else ""
+        candidate_starts = []
+
+        for i, pw in enumerate(page_words):
+            page_word = pw[4].lower()
+            # Check if this could be the start of the chunk
+            if (
+                first_chunk_word == page_word
+                or first_chunk_word in page_word
+                or page_word in first_chunk_word
+            ):
+                candidate_starts.append(i)
+
+        # Try each candidate start position and take the FIRST good match
+        for start_pos in candidate_starts:
+            current_matches = []
+            chunk_idx = 0
+            skip_count = 0
+            max_skips = 3  # Allow some formatting differences
+
+            for page_idx in range(start_pos, len(page_words)):
+                if chunk_idx >= len(chunk_words):
+                    break
+
+                page_word = page_words[page_idx][4].lower()
+                chunk_word = chunk_words[chunk_idx]
+
+                # Check for match (allow partial matches for flexibility)
+                if (
+                    chunk_word == page_word
+                    or chunk_word in page_word
+                    or page_word in chunk_word
+                ):
+                    current_matches.append(page_words[page_idx])
+                    chunk_idx += 1
+                    skip_count = 0
+                elif skip_count < max_skips:
+                    # Allow skipping some words (formatting, punctuation)
+                    skip_count += 1
+                    continue
+                else:
+                    break
+
+            # Accept if we matched at least 50% of chunk words
+            if len(current_matches) >= len(chunk_words) * 0.5:
+                matches = current_matches
+                logger.debug(
+                    f"Found match at position {start_pos}: "
+                    f"{len(matches)}/{len(chunk_words)} words"
+                )
+                break  # Take FIRST match, not best/longest
+
+        if not matches:
+            logger.debug(f"No word matches found (chunk has {len(chunk_words)} words)")
+            return 0
+
+        logger.debug(
+            f"Matched {len(matches)} words out of {len(chunk_words)} chunk words"
+        )
+
+        # Build rectangles from matched words
+        rects = [pymupdf.Rect(w[0], w[1], w[2], w[3]) for w in matches]
+
+        # Check if matches are contiguous (not scattered across the page)
+        # Scattered matches indicate false positives from common words
+        if len(rects) > 1:
+            # Sort by vertical position then horizontal
+            sorted_matches = sorted(matches, key=lambda w: (round(w[1]), w[0]))
+
+            # Check for large vertical gaps (more than ~2 lines apart)
+            # A typical line height is 12-20 points
+            max_line_gap = 50  # Points - allows for ~2-3 lines gap
+            prev_y = sorted_matches[0][1]
+            large_gaps = 0
+
+            for match in sorted_matches[1:]:
+                y_gap = match[1] - prev_y
+                if y_gap > max_line_gap:
+                    large_gaps += 1
+                prev_y = match[1]
+
+            # If matches are scattered (many large gaps), reject this match
+            # A chunk should be mostly contiguous text
+            if large_gaps > len(matches) * 0.3:  # More than 30% have gaps
+                logger.debug(
+                    f"Rejecting scattered matches: {large_gaps} large gaps "
+                    f"out of {len(matches)} matches"
+                )
+                return 0
+
+        # Merge adjacent rectangles on the same line for cleaner highlighting
+        merged_rects = []
+        sorted_rects = sorted(rects, key=lambda r: (round(r.y0), r.x0))
+
+        current_rect = None
+        for rect in sorted_rects:
+            if current_rect is None:
+                current_rect = rect
+            elif abs(rect.y0 - current_rect.y0) < 5:  # Same line (within 5 points)
+                current_rect = current_rect | rect  # Union
+            else:
+                merged_rects.append(current_rect)
+                current_rect = rect
+
+        if current_rect:
+            merged_rects.append(current_rect)
+
+        # Add highlights
+        rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
+        for rect in merged_rects:
+            highlight = page.add_highlight_annot(rect)
+            highlight.set_colors({"stroke": rgb})
+            highlight.set_info(
+                content="Chunk from semantic search",
+                title="PDF Highlighter (word-position)",
+            )
+            highlight.update()
+
+        return len(merged_rects)
+
+    @staticmethod
+    def find_unique_phrase(
+        text: str, min_len: int = 30, max_len: int = 80
+    ) -> str | None:
+        """Find a relatively unique phrase from text for location search.
+
+        Looks for phrases that are likely to be unique on the page:
+        - Prefers phrases with numbers or special terms
+        - Avoids very common words
+
+        Args:
+            text: Source text to extract phrase from
+            min_len: Minimum phrase length
+            max_len: Maximum phrase length
+
+        Returns:
+            A phrase likely to be unique, or None if not found
+        """
+        clean_text = PDFHighlighter.strip_markdown(text).strip()
+        if not clean_text:
+            return None
+
+        # Try first sentence (often unique due to context)
+        sentences = re.split(r"[.!?]\s+", clean_text)
+        for sentence in sentences:
+            sentence = sentence.strip()
+            if min_len <= len(sentence) <= max_len:
+                return sentence
+            elif len(sentence) > max_len:
+                return sentence[:max_len]
+
+        # Fallback: first N chars
+        if len(clean_text) >= min_len:
+            return clean_text[:max_len]
+
+        return clean_text if clean_text else None
+
+    @staticmethod
+    def _find_chunk_bbox(
+        page: pymupdf.Page,
+        chunk_text: str,
+        page_relative_start: int,
+        page_relative_end: int,
+        page_text_length: int,
+    ) -> tuple[float, float, float, float] | None:
+        """Find bounding box for a chunk without modifying the page.
+
+        Returns (x0, y0, x1, y1) in page coordinates, or None if not found.
+        """
+        page_rect = page.rect
+
+        # Strip markdown for searching
+        search_text = PDFHighlighter.strip_markdown(chunk_text)
+
+        # Try to find chunk location using text search
+        anchor_rect = None
+        search_phrases = []
+
+        # Build search phrases from chunk text
+        sentences = re.split(r"[.!?]\s+", search_text)
+        for sentence in sentences[:3]:
+            sentence = sentence.strip()
+            if len(sentence) >= 20:
+                search_phrases.append(sentence[:80])
+                if len(sentence) >= 40:
+                    search_phrases.append(sentence[:40])
+
+        # Also try first N characters
+        if len(search_text) >= 30:
+            search_phrases.append(search_text[:60])
+            search_phrases.append(search_text[:30])
+
+        for phrase in search_phrases:
+            if not phrase:
+                continue
+            rects = page.search_for(phrase.strip())
+            if rects:
+                anchor_rect = rects[0]
+                break
+
+        if not anchor_rect:
+            return None
+
+        # Calculate chunk height based on character count
+        chunk_chars = len(search_text)
+        estimated_lines = max(1, chunk_chars / 60)
+        estimated_height = estimated_lines * 14
+
+        # Build bounding box
+        return (
+            page_rect.x0 + 30,  # Left margin
+            anchor_rect.y0 - 5,  # Start slightly above anchor
+            page_rect.x1 - 30,  # Right margin
+            min(anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30),
+        )
+
+    @staticmethod
+    def highlight_chunk_on_page(
+        page: pymupdf.Page,
+        chunk_text: str,
+        color: str = "yellow",
+        page_relative_start: int | None = None,
+        page_relative_end: int | None = None,
+        page_text_length: int | None = None,
+    ) -> int:
+        """Add bounding box highlight to a PDF page for the given chunk text.
+
+        Uses text search to find the chunk's location on the page, then draws
+        a bounding box around that region. Falls back to character offset estimation
+        if text search fails.
+
+        Args:
+            page: PyMuPDF page object
+            chunk_text: Text to highlight (may contain markdown)
+            color: Color name from COLORS dict
+            page_relative_start: Character offset where chunk starts on page (optional)
+            page_relative_end: Character offset where chunk ends on page (optional)
+            page_text_length: Total character length of page text (optional)
+
+        Returns:
+            Number of highlights added (1 for bounding box, 0 if failed)
+        """
+        page_rect = page.rect
+        rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
+
+        # Strip markdown for searching
+        search_text = PDFHighlighter.strip_markdown(chunk_text)
+
+        # Try to find chunk location using text search
+        # Search for progressively shorter phrases until we find a match
+        anchor_rect = None
+        search_phrases = []
+
+        # Build search phrases from chunk text
+        sentences = re.split(r"[.!?]\s+", search_text)
+        for sentence in sentences[:3]:  # Try first 3 sentences
+            sentence = sentence.strip()
+            if len(sentence) >= 20:
+                search_phrases.append(sentence[:80])
+                if len(sentence) >= 40:
+                    search_phrases.append(sentence[:40])
+
+        # Also try first N characters
+        if len(search_text) >= 30:
+            search_phrases.append(search_text[:60])
+            search_phrases.append(search_text[:30])
+
+        for phrase in search_phrases:
+            if not phrase:
+                continue
+            rects = page.search_for(phrase.strip())
+            if rects:
+                anchor_rect = rects[0]  # Use first match
+                logger.debug(f"Found chunk anchor using phrase: '{phrase[:30]}...'")
+                break
+
+        if not anchor_rect:
+            page_num = page.number + 1 if page.number is not None else "unknown"
+            logger.warning(f"Could not find chunk text on page {page_num}")
+            return 0
+
+        # Calculate chunk height based on character count
+        # Estimate ~15 chars per line, ~12pt line height
+        chunk_chars = len(search_text)
+        estimated_lines = max(1, chunk_chars / 60)  # ~60 chars per line typical
+        estimated_height = estimated_lines * 14  # ~14pt per line
+
+        # Build bounding box starting from anchor
+        chunk_rect = pymupdf.Rect(
+            page_rect.x0 + 30,  # Left margin
+            anchor_rect.y0 - 5,  # Start slightly above anchor
+            page_rect.x1 - 30,  # Right margin
+            min(
+                anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30
+            ),  # Estimated bottom
+        )
+
+        # Draw a visible rectangle around the chunk region
+        shape = page.new_shape()
+        shape.draw_rect(chunk_rect)
+        shape.finish(
+            color=rgb,  # Border color
+            fill=None,  # No fill (transparent)
+            width=2.5,  # Border width
+            dashes="[4 2]",  # Dashed line
+        )
+        shape.commit()
+
+        # Add semi-transparent fill for visibility
+        fill_shape = page.new_shape()
+        fill_shape.draw_rect(chunk_rect)
+        fill_shape.finish(
+            color=None,  # No border
+            fill=[1, 1, 0.7],  # Light yellow fill
+            fill_opacity=0.15,  # Very transparent
+        )
+        fill_shape.commit()
+
+        logger.debug(
+            f"Added bounding box at y={chunk_rect.y0:.0f}-{chunk_rect.y1:.0f} "
+            f"(estimated {estimated_lines:.1f} lines)"
+        )
+
+        return 1
+
+    @staticmethod
+    def highlight_chunk(
+        pdf_bytes: bytes,
+        chunk_start_offset: int,
+        chunk_end_offset: int,
+        stored_page_number: Optional[int] = None,
+        color: str = "yellow",
+        zoom: float = 2.0,
+    ) -> Optional[tuple[bytes, int, int]]:
+        """Generate PNG image of PDF page with highlighted chunk.
+
+        This is the main entry point for highlighting. It:
+        1. Extracts document text with page boundaries
+        2. Finds which page contains the chunk
+        3. Extracts chunk text using character offsets
+        4. Highlights the chunk on the page
+        5. Renders page to PNG
+
+        Args:
+            pdf_bytes: PDF file bytes
+            chunk_start_offset: Chunk start position (document-level)
+            chunk_end_offset: Chunk end position (document-level)
+            stored_page_number: Page number from metadata (optional, for validation)
+            color: Highlight color name
+            zoom: Rendering zoom factor (2.0 = 144 DPI)
+
+        Returns:
+            Tuple of (png_bytes, page_number, highlight_count) or None if failed
+        """
+        import tempfile
+        from pathlib import Path
+
+        temp_pdf_path = None
+        try:
+            # Write PDF to temp file with consistent name "pdf.pdf"
+            # This ensures image references match indexing (e.g., pdf-0001.png)
+            # Different temp filenames would cause different markdown text lengths!
+            temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
+            temp_pdf_path = temp_dir / "pdf.pdf"
+            temp_pdf_path.write_bytes(pdf_bytes)
+
+            # Open PDF from temp file
+            doc = pymupdf.open(temp_pdf_path)
+
+            # Extract text with page boundaries
+            full_text, page_boundaries = (
+                PDFHighlighter.extract_pdf_text_with_boundaries(doc)
+            )
+
+            # Find which page contains the chunk
+            chunk_page_info = PDFHighlighter.find_chunk_page(
+                chunk_start_offset, chunk_end_offset, page_boundaries
+            )
+
+            if not chunk_page_info:
+                logger.error("Chunk not found on any page")
+                doc.close()
+                return None
+
+            page_num = chunk_page_info["page_num"]
+
+            # Log if page differs from stored metadata
+            if stored_page_number and stored_page_number != page_num:
+                logger.info(
+                    f"Chunk primarily on page {page_num}, metadata says {stored_page_number}"
+                )
+
+            # Extract page text
+            page_boundary = page_boundaries[page_num - 1]
+            page_start = page_boundary["start_offset"]
+            page_end = page_boundary["end_offset"]
+            page_text = full_text[page_start:page_end]
+
+            # Extract chunk text using page-relative offsets
+            page_relative_start = chunk_page_info["page_relative_start"]
+            page_relative_end = chunk_page_info["page_relative_end"]
+            chunk_text = page_text[page_relative_start:page_relative_end]
+
+            # Calculate page text length for region estimation
+            page_text_length = page_end - page_start
+
+            logger.debug(
+                f"Extracted {len(chunk_text)} chars on page {page_num} "
+                f"(offsets {page_relative_start}-{page_relative_end} of {page_text_length})"
+            )
+
+            # Get page and add highlights
+            page = doc[page_num - 1]
+            highlight_count = PDFHighlighter.highlight_chunk_on_page(
+                page,
+                chunk_text,
+                color,
+                page_relative_start=page_relative_start,
+                page_relative_end=page_relative_end,
+                page_text_length=page_text_length,
+            )
+
+            if highlight_count == 0:
+                logger.warning("No highlights added")
+                doc.close()
+                return None
+
+            # Render page to PNG
+            mat = pymupdf.Matrix(zoom, zoom)
+            pix = page.get_pixmap(matrix=mat, alpha=False)
+            png_bytes = pix.tobytes("png")
+
+            doc.close()
+
+            logger.info(
+                f"Generated {len(png_bytes):,} byte image with {highlight_count} highlights"
+            )
+
+            return (png_bytes, page_num, highlight_count)
+
+        except Exception as e:
+            logger.error(f"Error highlighting chunk: {e}", exc_info=True)
+            return None
+
+        finally:
+            # Clean up temp directory and PDF file
+            if temp_pdf_path and temp_pdf_path.parent.exists():
+                try:
+                    import shutil
+
+                    shutil.rmtree(temp_pdf_path.parent)
+                except Exception as e:
+                    logger.warning(
+                        f"Failed to delete temp directory {temp_pdf_path.parent}: {e}"
+                    )
+
+    @staticmethod
+    def highlight_chunks_batch(
+        pdf_bytes: bytes,
+        chunks: list[tuple[int, int, int, int | None, str]],
+        page_boundaries: list[dict],
+        full_text: str,
+        color: str = "yellow",
+        zoom: float = 2.0,
+    ) -> dict[int, tuple[bytes, int, int]]:
+        """Generate highlighted images for multiple chunks.
+
+        Opens PDF once for rendering, uses pre-computed page boundaries from the
+        document processor. This ensures consistent character offsets between
+        chunking and highlighting.
+
+        Args:
+            pdf_bytes: PDF file bytes
+            chunks: List of (chunk_index, start_offset, end_offset, stored_page_number, chunk_text)
+                    The chunk_index is used as the key in the returned dict.
+                    chunk_text is the actual text content of the chunk.
+            page_boundaries: Pre-computed page boundaries from document processor.
+                            Each entry: {"page": 1, "start_offset": 0, "end_offset": 1234}
+            full_text: Full document text for extracting page-relative portions.
+            color: Highlight color name
+            zoom: Rendering zoom factor (2.0 = 144 DPI)
+
+        Returns:
+            Dict mapping chunk_index to (png_bytes, page_number, highlight_count)
+            Chunks that fail to highlight are omitted from the result.
+        """
+        import shutil
+        import tempfile
+        from collections import defaultdict
+        from pathlib import Path
+
+        results: dict[int, tuple[bytes, int, int]] = {}
+
+        if not chunks:
+            return results
+
+        temp_pdf_path = None
+        try:
+            # Write PDF to temp file
+            temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_batch_"))
+            temp_pdf_path = temp_dir / "pdf.pdf"
+            temp_pdf_path.write_bytes(pdf_bytes)
+
+            # Open PDF once (only for rendering, not text extraction)
+            doc = pymupdf.open(temp_pdf_path)
+
+            logger.debug(
+                f"Batch highlighting: {len(chunks)} chunks, "
+                f"{len(page_boundaries)} pages"
+            )
+
+            # Group chunks by their target page for efficient rendering
+            # We'll render each page only once with all its highlights
+            chunks_by_page: dict[int, list[tuple[int, dict, str]]] = defaultdict(list)
+
+            for chunk_tuple in chunks:
+                # Unpack chunk tuple - chunk_text is now passed directly
+                chunk_index, start_offset, end_offset, stored_page_num, chunk_text = (
+                    chunk_tuple
+                )
+
+                # Find which page contains this chunk
+                chunk_page_info = PDFHighlighter.find_chunk_page(
+                    start_offset, end_offset, page_boundaries
+                )
+
+                if not chunk_page_info:
+                    logger.warning(f"Chunk {chunk_index}: not found on any page")
+                    continue
+
+                page_num = chunk_page_info["page_num"]
+
+                # Log if page differs from stored metadata
+                if stored_page_num and stored_page_num != page_num:
+                    logger.debug(
+                        f"Chunk {chunk_index}: found on page {page_num}, "
+                        f"metadata says {stored_page_num}"
+                    )
+
+                # Extract page-relative portion of chunk text
+                # This is critical for cross-page chunks where the start
+                # of the chunk might be on a different page
+                page_boundary = page_boundaries[page_num - 1]
+                page_start = page_boundary["start_offset"]
+                page_end = page_boundary["end_offset"]
+                page_text_length = page_end - page_start
+
+                # Calculate what portion of the chunk appears on this page
+                chunk_start_on_page = max(start_offset, page_start)
+                chunk_end_on_page = min(end_offset, page_end)
+
+                # Extract just the text that appears on this page
+                page_relative_text = full_text[chunk_start_on_page:chunk_end_on_page]
+
+                chunks_by_page[page_num].append(
+                    (chunk_index, chunk_page_info, page_relative_text, page_text_length)
+                )
+
+            logger.debug(
+                f"Chunks distributed across {len(chunks_by_page)} unique pages"
+            )
+
+            # OPTIMIZATION: Render each page ONCE, then draw highlights using PIL
+            # This avoids expensive page.get_pixmap() calls per chunk
+            from io import BytesIO
+
+            from PIL import Image, ImageDraw
+
+            # PIL color for bounding box (RGB tuple)
+            rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
+            pil_color = tuple(int(c * 255) for c in rgb)
+            fill_color = (255, 255, 178, 38)  # Light yellow with alpha
+
+            for page_num, page_chunks in chunks_by_page.items():
+                page = doc[page_num - 1]
+
+                # Render page ONCE to get base image (most expensive operation)
+                mat = pymupdf.Matrix(zoom, zoom)
+                base_pix = page.get_pixmap(matrix=mat, alpha=False)
+                base_png = base_pix.tobytes("png")
+
+                # Convert to PIL Image for fast highlight drawing
+                base_image = Image.open(BytesIO(base_png)).convert("RGBA")
+                page_rect = page.rect
+
+                logger.debug(
+                    f"Page {page_num}: rendered once, processing {len(page_chunks)} chunks"
+                )
+
+                for (
+                    chunk_index,
+                    chunk_page_info,
+                    chunk_text,
+                    page_text_length,
+                ) in page_chunks:
+                    try:
+                        # Find chunk bounding box using text search
+                        bbox = PDFHighlighter._find_chunk_bbox(
+                            page,
+                            chunk_text,
+                            chunk_page_info["page_relative_start"],
+                            chunk_page_info["page_relative_end"],
+                            page_text_length,
+                        )
+
+                        if bbox is None:
+                            logger.warning(f"Chunk {chunk_index}: could not find bbox")
+                            continue
+
+                        # Copy base image for this chunk
+                        chunk_image = base_image.copy()
+
+                        # Scale bbox coordinates to pixmap coordinates
+                        scale_x = base_pix.width / page_rect.width
+                        scale_y = base_pix.height / page_rect.height
+                        pil_bbox = (
+                            int(bbox[0] * scale_x),
+                            int(bbox[1] * scale_y),
+                            int(bbox[2] * scale_x),
+                            int(bbox[3] * scale_y),
+                        )
+
+                        # Create transparent overlay for fill (proper alpha blending)
+                        overlay = Image.new("RGBA", chunk_image.size, (0, 0, 0, 0))
+                        overlay_draw = ImageDraw.Draw(overlay)
+                        overlay_draw.rectangle(pil_bbox, fill=fill_color)
+
+                        # Alpha composite the overlay onto the chunk image
+                        chunk_image = Image.alpha_composite(chunk_image, overlay)
+
+                        # Draw border on top (solid, not transparent)
+                        border_draw = ImageDraw.Draw(chunk_image)
+                        border_draw.rectangle(pil_bbox, outline=pil_color, width=3)
+
+                        # Convert back to PNG bytes
+                        output = BytesIO()
+                        chunk_image.convert("RGB").save(output, format="PNG")
+                        png_bytes = output.getvalue()
+
+                        results[chunk_index] = (png_bytes, page_num, 1)
+
+                        logger.debug(
+                            f"Chunk {chunk_index}: {len(png_bytes):,} bytes, "
+                            f"page {page_num}, bbox {pil_bbox}"
+                        )
+
+                    except Exception as e:
+                        logger.error(f"Chunk {chunk_index}: error - {e}")
+                        continue
+
+            doc.close()
+
+            logger.info(
+                f"Batch highlighted {len(results)}/{len(chunks)} chunks successfully"
+            )
+
+            return results
+
+        except Exception as e:
+            logger.error(f"Error in batch highlighting: {e}", exc_info=True)
+            return results
+
+        finally:
+            # Clean up temp directory
+            if temp_pdf_path and temp_pdf_path.parent.exists():
+                try:
+                    shutil.rmtree(temp_pdf_path.parent)
+                except Exception as e:
+                    logger.warning(f"Failed to clean up temp dir: {e}")
@@ -0,0 +1,184 @@
+"""Semantic search algorithm using vector similarity (Qdrant)."""
+
+import logging
+from typing import Any
+
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.embedding import get_embedding_service
+from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+class SemanticSearchAlgorithm(SearchAlgorithm):
+    """Semantic search using vector similarity in Qdrant.
+
+    Searches documents by meaning rather than exact keywords using
+    768-dimensional embeddings and cosine distance.
+    """
+
+    def __init__(self, score_threshold: float = 0.7):
+        """Initialize semantic search algorithm.
+
+        Args:
+            score_threshold: Minimum similarity score (0-1, default: 0.7)
+        """
+        self.score_threshold = score_threshold
+
+    @property
+    def name(self) -> str:
+        return "semantic"
+
+    @property
+    def requires_vector_db(self) -> bool:
+        return True
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute semantic search using vector similarity.
+
+        Returns unverified results from Qdrant. Access verification should be
+        performed separately at the final output stage using verify_search_results().
+
+        Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
+        to show multiple chunks from the same document while avoiding duplicate chunks.
+
+        Args:
+            query: Natural language search query
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter
+            **kwargs: Additional parameters (score_threshold override)
+
+        Returns:
+            List of unverified SearchResult objects ranked by similarity score
+
+        Raises:
+            McpError: If vector sync is not enabled or search fails
+        """
+        settings = get_settings()
+        score_threshold = kwargs.get("score_threshold", self.score_threshold)
+
+        logger.info(
+            f"Semantic search: query='{query}', user={user_id}, "
+            f"limit={limit}, score_threshold={score_threshold}, doc_type={doc_type}"
+        )
+
+        # Generate embedding for query
+        embedding_service = get_embedding_service()
+        query_embedding = await embedding_service.embed(query)
+        # Store for reuse by callers (e.g., viz_routes PCA visualization)
+        self.query_embedding = query_embedding
+        logger.debug(
+            f"Generated embedding for query (dimension={len(query_embedding)})"
+        )
+
+        # Build Qdrant filter
+        filter_conditions = [
+            get_placeholder_filter(),  # Always exclude placeholders from user-facing queries
+            FieldCondition(
+                key="user_id",
+                match=MatchValue(value=user_id),
+            ),
+        ]
+
+        # Add doc_type filter if specified
+        if doc_type:
+            filter_conditions.append(
+                FieldCondition(
+                    key="doc_type",
+                    match=MatchValue(value=doc_type),
+                )
+            )
+
+        # Search Qdrant
+        qdrant_client = await get_qdrant_client()
+        try:
+            search_response = await qdrant_client.query_points(
+                collection_name=settings.get_collection_name(),
+                query=query_embedding,
+                using="dense",  # Use named dense vector (BM25 hybrid collections)
+                query_filter=Filter(must=filter_conditions),
+                limit=limit * 2,  # Get extra for deduplication
+                score_threshold=score_threshold,
+                with_payload=True,
+                with_vectors=False,  # Don't return vectors to save bandwidth
+            )
+            record_qdrant_operation("search", "success")
+        except Exception:
+            record_qdrant_operation("search", "error")
+            raise
+
+        logger.info(
+            f"Qdrant returned {len(search_response.points)} results "
+            f"(before deduplication)"
+        )
+
+        if search_response.points:
+            # Log top 3 scores to help with threshold tuning
+            top_scores = [p.score for p in search_response.points[:3]]
+            logger.debug(f"Top 3 similarity scores: {top_scores}")
+
+        # Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
+        # This allows multiple chunks from same doc, but removes duplicate chunks
+        seen_chunks = set()
+        results = []
+
+        for result in search_response.points:
+            # doc_id can be int (notes) or str (files - file paths)
+            doc_id = result.payload["doc_id"]
+            doc_type = result.payload.get("doc_type", "note")
+            chunk_start = result.payload.get("chunk_start_offset")
+            chunk_end = result.payload.get("chunk_end_offset")
+            chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
+
+            # Skip if we've already seen this exact chunk
+            if chunk_key in seen_chunks:
+                continue
+
+            seen_chunks.add(chunk_key)
+
+            # Return unverified results (verification happens at output stage)
+            results.append(
+                SearchResult(
+                    id=doc_id,
+                    doc_type=doc_type,
+                    title=result.payload.get("title", "Untitled"),
+                    excerpt=result.payload.get("excerpt", ""),
+                    score=result.score,
+                    metadata={
+                        "chunk_index": result.payload.get("chunk_index"),
+                        "total_chunks": result.payload.get("total_chunks"),
+                    },
+                    chunk_start_offset=result.payload.get("chunk_start_offset"),
+                    chunk_end_offset=result.payload.get("chunk_end_offset"),
+                    page_number=result.payload.get("page_number"),
+                    chunk_index=result.payload.get("chunk_index", 0),
+                    total_chunks=result.payload.get("total_chunks", 1),
+                    point_id=str(result.id),  # Qdrant point ID for batch retrieval
+                )
+            )
+
+            if len(results) >= limit:
+                break
+
+        logger.info(f"Returning {len(results)} unverified results after deduplication")
+        if results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in results[:5]  # Show top 5
+            ]
+            logger.debug(f"Top results: {', '.join(result_details)}")
+
+        return results
@@ -2,7 +2,8 @@

 import logging

-from httpx import HTTPStatusError, RequestError
+import anyio
+from httpx import RequestError
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.shared.exceptions import McpError
 from mcp.types import (
@@ -23,8 +24,9 @@ from nextcloud_mcp_server.models.semantic import (
 )
 from nextcloud_mcp_server.observability.metrics import (
    instrument_tool,
-    record_qdrant_operation,
 )
+from nextcloud_mcp_server.search.bm25_hybrid import BM25HybridSearchAlgorithm
+from nextcloud_mcp_server.search.context import get_chunk_with_context

 logger = logging.getLogger(__name__)

@@ -36,187 +38,252 @@ def configure_semantic_tools(mcp: FastMCP):
    @require_scopes("semantic:read")
    @instrument_tool
    async def nc_semantic_search(
-        query: str, ctx: Context, limit: int = 10, score_threshold: float = 0.7
+        query: str,
+        ctx: Context,
+        limit: int = 10,
+        doc_types: list[str] | None = None,
+        score_threshold: float = 0.0,
+        fusion: str = "rrf",
+        include_context: bool = False,
+        context_chars: int = 300,
    ) -> SemanticSearchResponse:
        """
-        Semantic search across all indexed Nextcloud apps using vector embeddings.
+        Search Nextcloud content using BM25 hybrid search with cross-app support.

-        Searches documents by meaning rather than exact keywords across notes, calendar
-        events, deck cards, files, and contacts. Requires vector database synchronization
-        to be enabled (VECTOR_SYNC_ENABLED=true).
+        Uses Qdrant's native hybrid search combining:
+        - Dense semantic vectors: For conceptual similarity and natural language queries
+        - BM25 sparse vectors: For precise keyword matching, acronyms, and specific terms
+
+        Results are automatically fused using the selected fusion algorithm in the
+        database for optimal relevance. This provides the best of both semantic
+        understanding and keyword precision.
+
+        Requires VECTOR_SYNC_ENABLED=true. Currently only "note" documents are
+        fully supported for indexing.

        Args:
-            query: Natural language search query
+            query: Natural language or keyword search query
            limit: Maximum number of results to return (default: 10)
-            score_threshold: Minimum similarity score (0-1, default: 0.7)
+            doc_types: Document types to search (e.g., ["note", "file"]). None = search all indexed types (default)
+            score_threshold: Minimum fusion score (0-1, default: 0.0)
+            fusion: Fusion algorithm: "rrf" (Reciprocal Rank Fusion, default) or "dbsf" (Distribution-Based Score Fusion)
+                   RRF: Good general-purpose fusion using reciprocal ranks
+                   DBSF: Uses distribution-based normalization, may better balance different score ranges
+            include_context: Whether to expand results with surrounding context (default: False)
+            context_chars: Number of characters to include before/after matched chunk (default: 300)

        Returns:
-            SemanticSearchResponse with matching documents and similarity scores
+            SemanticSearchResponse with matching documents ranked by fusion scores
        """
-        from qdrant_client.models import FieldCondition, Filter, MatchValue
-
        from nextcloud_mcp_server.config import get_settings
-        from nextcloud_mcp_server.embedding import get_embedding_service
-        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

        settings = get_settings()
-
-        # Check if vector sync is enabled
-        if not settings.vector_sync_enabled:
-            raise McpError(
-                ErrorData(
-                    code=-1,
-                    message="Semantic search is not enabled. Set VECTOR_SYNC_ENABLED=true and ensure vector database is configured.",
-                )
-            )
-
        client = await get_client(ctx)
        username = client.username

        logger.info(
-            f"Semantic search: query='{query}', user={username}, "
-            f"limit={limit}, score_threshold={score_threshold}"
+            f"BM25 hybrid search: query='{query}', user={username}, "
+            f"limit={limit}, score_threshold={score_threshold}, fusion={fusion}"
        )

-        try:
-            # Generate embedding for query
-            embedding_service = get_embedding_service()
-            query_embedding = await embedding_service.embed(query)
-            logger.debug(
-                f"Generated embedding for query (dimension={len(query_embedding)})"
-            )
-
-            # Search Qdrant with user filtering
-            # Note: Currently only searching notes (doc_type="note")
-            # Future: Remove doc_type filter to search all apps
-            qdrant_client = await get_qdrant_client()
-            try:
-                search_response = await qdrant_client.query_points(
-                    collection_name=settings.get_collection_name(),
-                    query=query_embedding,
-                    query_filter=Filter(
-                        must=[
-                            FieldCondition(
-                                key="user_id",
-                                match=MatchValue(value=username),
-                            ),
-                            FieldCondition(
-                                key="doc_type",
-                                match=MatchValue(value="note"),
-                            ),
-                        ]
-                    ),
-                    limit=limit * 2,  # Get extra for filtering
-                    score_threshold=score_threshold,
-                    with_payload=True,
-                    with_vectors=False,  # Don't return vectors to save bandwidth
+        # Check that vector sync is enabled
+        if not settings.vector_sync_enabled:
+            raise McpError(
+                ErrorData(
+                    code=-1,
+                    message="BM25 hybrid search requires VECTOR_SYNC_ENABLED=true",
                )
-                # Record successful search operation
-                record_qdrant_operation("search", "success")
-            except Exception:
-                # Record failed search operation
-                record_qdrant_operation("search", "error")
-                raise
-
-            logger.info(
-                f"Qdrant returned {len(search_response.points)} results "
-                f"(before deduplication and access verification)"
            )
-            if search_response.points:
-                # Log top 3 scores to help with threshold tuning
-                top_scores = [p.score for p in search_response.points[:3]]
-                logger.debug(f"Top 3 similarity scores: {top_scores}")

-            # Deduplicate by document ID (multiple chunks per document)
-            seen_doc_ids = set()
+        try:
+            # Create BM25 hybrid search algorithm with specified fusion
+            search_algo = BM25HybridSearchAlgorithm(
+                score_threshold=score_threshold, fusion=fusion
+            )
+
+            # Execute search across requested document types
+            # If doc_types is None, search all indexed types (cross-app search)
+            # If doc_types is a list, search only those types
+            all_results = []
+
+            if doc_types is None:
+                # Cross-app search: search all indexed types
+                # Get unverified results from Qdrant
+                unverified_results = await search_algo.search(
+                    query=query,
+                    user_id=username,
+                    limit=limit * 2,  # Get extra for access filtering
+                    doc_type=None,  # Signal to search all types
+                    score_threshold=score_threshold,
+                )
+                all_results.extend(unverified_results)
+            else:
+                # Search specific document types
+                # For each requested type, execute search and combine results
+                for dtype in doc_types:
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Get extra for combining and filtering
+                        doc_type=dtype,
+                        score_threshold=score_threshold,
+                    )
+                    all_results.extend(unverified_results)
+
+                # Sort combined results by score
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # Note: BM25HybridSearchAlgorithm already deduplicates at chunk level
+            # (doc_id, doc_type, chunk_start, chunk_end), which allows multiple
+            # chunks from the same document while preventing duplicate chunks.
+            # No additional deduplication needed here - multiple chunks per document
+            # are valuable for RAG contexts.
+            # Qdrant already filters by user_id for multi-tenant isolation.
+            # Sampling tool will verify access when fetching full content.
+            search_results = all_results[
+                :limit
+            ]  # Final limit after chunk-level dedup in algorithm
+
+            # Convert SearchResult objects to SemanticSearchResult for response
            results = []
+            for r in search_results:
+                results.append(
+                    SemanticSearchResult(
+                        id=r.id,
+                        doc_type=r.doc_type,
+                        title=r.title,
+                        category=r.metadata.get("category", "") if r.metadata else "",
+                        excerpt=r.excerpt,
+                        score=r.score,
+                        chunk_index=r.metadata.get("chunk_index", 0)
+                        if r.metadata
+                        else 0,
+                        total_chunks=r.metadata.get("total_chunks", 1)
+                        if r.metadata
+                        else 1,
+                        chunk_start_offset=r.chunk_start_offset,
+                        chunk_end_offset=r.chunk_end_offset,
+                        page_number=r.page_number,
+                    )
+                )

-            for result in search_response.points:
-                doc_id = int(result.payload["doc_id"])
-                doc_type = result.payload.get("doc_type", "note")
+            # Expand results with surrounding context if requested
+            if include_context and results:
+                logger.info(
+                    f"Expanding {len(results)} results with context "
+                    f"(context_chars={context_chars})"
+                )

-                # Skip if we've already seen this document
-                if doc_id in seen_doc_ids:
-                    continue
+                # Fetch context for all results in parallel
+                # Limit concurrent requests to prevent connection pool exhaustion
+                max_concurrent = 20
+                semaphore = anyio.Semaphore(max_concurrent)
+                expanded_results = [None] * len(results)

-                seen_doc_ids.add(doc_id)
+                async def fetch_context(index: int, result: SemanticSearchResult):
+                    """Fetch context for a single result (parallel with semaphore)."""
+                    async with semaphore:
+                        # Only expand if we have valid chunk offsets
+                        if (
+                            result.chunk_start_offset is None
+                            or result.chunk_end_offset is None
+                        ):
+                            # Keep result as-is without context expansion
+                            expanded_results[index] = result
+                            return

-                # Verify access via Nextcloud API (dual-phase authorization)
-                # Currently only supports notes, will be extended to other apps
-                if doc_type == "note":
-                    try:
-                        note = await client.notes.get_note(doc_id)
-
-                        results.append(
-                            SemanticSearchResult(
-                                id=doc_id,
-                                doc_type="note",
-                                title=result.payload["title"],
-                                category=note.get("category", ""),
-                                excerpt=result.payload["excerpt"],
-                                score=result.score,
-                                chunk_index=result.payload["chunk_index"],
-                                total_chunks=result.payload["total_chunks"],
+                        try:
+                            chunk_context = await get_chunk_with_context(
+                                nc_client=client,
+                                user_id=username,
+                                doc_id=result.id,
+                                doc_type=result.doc_type,
+                                chunk_start=result.chunk_start_offset,
+                                chunk_end=result.chunk_end_offset,
+                                page_number=result.page_number,
+                                chunk_index=result.chunk_index,
+                                total_chunks=result.total_chunks,
+                                context_chars=context_chars,
                            )
-                        )

-                        if len(results) >= limit:
-                            break
-
-                    except HTTPStatusError as e:
-                        if e.response.status_code == 403:
-                            # User lost access, skip this document
-                            logger.debug(f"Skipping note {doc_id}: access denied (403)")
-                            continue
-                        elif e.response.status_code == 404:
-                            # Document was deleted but not yet removed from vector DB
-                            logger.debug(
-                                f"Skipping note {doc_id}: not found (404), "
-                                f"likely deleted after indexing"
-                            )
-                            continue
-                        else:
-                            # Log other errors but continue processing
+                            if chunk_context:
+                                # Create new result with context fields populated
+                                expanded_results[index] = SemanticSearchResult(
+                                    id=result.id,
+                                    doc_type=result.doc_type,
+                                    title=result.title,
+                                    category=result.category,
+                                    excerpt=result.excerpt,
+                                    score=result.score,
+                                    chunk_index=result.chunk_index,
+                                    total_chunks=result.total_chunks,
+                                    chunk_start_offset=result.chunk_start_offset,
+                                    chunk_end_offset=result.chunk_end_offset,
+                                    page_number=result.page_number,
+                                    # Context expansion fields
+                                    has_context_expansion=True,
+                                    marked_text=chunk_context.marked_text,
+                                    before_context=chunk_context.before_context,
+                                    after_context=chunk_context.after_context,
+                                    has_before_truncation=chunk_context.has_before_truncation,
+                                    has_after_truncation=chunk_context.has_after_truncation,
+                                )
+                                logger.debug(
+                                    f"Expanded context for {result.doc_type} {result.id}"
+                                )
+                            else:
+                                # Context expansion failed, keep original result
+                                expanded_results[index] = result
+                                logger.debug(
+                                    f"Failed to expand context for {result.doc_type} {result.id}, "
+                                    "keeping original result"
+                                )
+                        except Exception as e:
+                            # Context expansion failed, keep original result
+                            expanded_results[index] = result
                            logger.warning(
-                                f"Error verifying access to note {doc_id}: {e.response.status_code}"
+                                f"Error expanding context for {result.doc_type} {result.id}: {e}"
                            )
-                            continue

-            logger.info(
-                f"Returning {len(results)} results after deduplication and access verification"
-            )
-            if results:
-                result_details = [
-                    f"note_{r.id} (score={r.score:.3f}, title='{r.title}')"
-                    for r in results[:5]  # Show top 5
-                ]
-                logger.debug(f"Top results: {', '.join(result_details)}")
+                # Run all context fetches in parallel using anyio task group
+                async with anyio.create_task_group() as tg:
+                    for idx, result in enumerate(results):
+                        tg.start_soon(fetch_context, idx, result)
+
+                # Replace results with expanded versions
+                results = [r for r in expanded_results if r is not None]
+                logger.info(
+                    f"Context expansion completed: {len(results)} results with context"
+                )
+
+            logger.info(f"Returning {len(results)} results from BM25 hybrid search")

            return SemanticSearchResponse(
                results=results,
                query=query,
                total_found=len(results),
-                search_method="semantic",
+                search_method=f"bm25_hybrid_{fusion}",
            )

        except ValueError as e:
-            if "No embedding provider configured" in str(e):
+            error_msg = str(e)
+            if "No embedding provider configured" in error_msg:
                raise McpError(
                    ErrorData(
                        code=-1,
                        message="Embedding service not configured. Set OLLAMA_BASE_URL environment variable.",
                    )
                )
-            raise McpError(ErrorData(code=-1, message=f"Configuration error: {str(e)}"))
+            raise McpError(
+                ErrorData(code=-1, message=f"Configuration error: {error_msg}")
+            )
        except RequestError as e:
            raise McpError(
                ErrorData(code=-1, message=f"Network error during search: {str(e)}")
            )
        except Exception as e:
-            logger.error(f"Semantic search error: {e}", exc_info=True)
-            raise McpError(
-                ErrorData(code=-1, message=f"Semantic search failed: {str(e)}")
-            )
+            logger.error(f"Search error: {e}", exc_info=True)
+            raise McpError(ErrorData(code=-1, message=f"Search failed: {str(e)}"))

    @mcp.tool()
    @require_scopes("semantic:read")
@@ -227,6 +294,9 @@ def configure_semantic_tools(mcp: FastMCP):
        limit: int = 5,
        score_threshold: float = 0.7,
        max_answer_tokens: int = 500,
+        fusion: str = "rrf",
+        include_context: bool = False,
+        context_chars: int = 300,
    ) -> SamplingSearchResponse:
        """
        Semantic search with LLM-generated answer using MCP sampling.
@@ -251,6 +321,9 @@ def configure_semantic_tools(mcp: FastMCP):
            limit: Maximum number of documents to retrieve (default: 5)
            score_threshold: Minimum similarity score 0-1 (default: 0.7)
            max_answer_tokens: Maximum tokens for generated answer (default: 500)
+            fusion: Fusion algorithm: "rrf" (Reciprocal Rank Fusion, default) or "dbsf" (Distribution-Based Score Fusion)
+            include_context: Whether to expand results with surrounding context (default: False)
+            context_chars: Number of characters to include before/after matched chunk (default: 300)

        Returns:
            SamplingSearchResponse containing:
@@ -262,27 +335,6 @@ def configure_semantic_tools(mcp: FastMCP):
        Note: Requires MCP client to support sampling. If sampling is unavailable,
        the tool gracefully degrades to returning documents with an explanation.
        The client may prompt the user to approve the sampling request.
-
-        Examples:
-            >>> # Query about objectives across multiple apps
-            >>> result = await nc_semantic_search_answer(
-            ...     query="What are my Q1 2025 project goals?",
-            ...     ctx=ctx
-            ... )
-            >>> print(result.generated_answer)
-            "Based on Document 1 (note: Project Kickoff), Document 2 (calendar event:
-            Q1 Planning Meeting), and Document 3 (deck card: Implement semantic search),
-            your main goals are: 1) Improve semantic search accuracy by 20%,
-            2) Deploy new embedding model, 3) Reduce indexing latency..."
-
-            >>> # Query about appointments
-            >>> result = await nc_semantic_search_answer(
-            ...     query="When is my next dentist appointment?",
-            ...     ctx=ctx,
-            ...     limit=10
-            ... )
-            >>> len(result.sources)  # Calendar events and related notes
-            3
        """
        # 1. Retrieve relevant documents via existing semantic search
        search_response = await nc_semantic_search(
@@ -290,6 +342,9 @@ def configure_semantic_tools(mcp: FastMCP):
            ctx=ctx,
            limit=limit,
            score_threshold=score_threshold,
+            fusion=fusion,
+            include_context=include_context,
+            context_chars=context_chars,
        )

        # 2. Handle no results case - don't waste a sampling call
@@ -344,35 +399,55 @@ def configure_semantic_tools(mcp: FastMCP):
                success=True,
            )

-        # 4. Fetch full content for notes to provide complete context to LLM
-        # Filter out inaccessible notes (deleted or permissions changed)
+        # 4. Fetch full content for notes in parallel (also verifies access)
+        # Use anyio task group for concurrent fetching with semaphore to prevent
+        # connection pool exhaustion
        client = await get_client(ctx)
-        accessible_results = []
-        full_contents = []  # Full content for accessible notes
+        accessible_results = [None] * len(search_response.results)
+        full_contents = [None] * len(search_response.results)

-        for result in search_response.results:
-            if result.doc_type == "note":
-                try:
-                    note = await client.notes.get_note(result.id)
-                    # Note is accessible, store full content
-                    accessible_results.append(result)
-                    full_contents.append(note.get("content", ""))
-                    logger.debug(
-                        f"Fetched full content for note {result.id} "
-                        f"(length: {len(full_contents[-1])} chars)"
-                    )
-                except Exception as e:
-                    # Note might have been deleted or permissions changed
-                    # Filter it out to avoid corrupting LLM with inaccessible data
-                    logger.warning(
-                        f"Failed to fetch full content for note {result.id}: {e}. "
-                        f"Excluding from results."
-                    )
-            else:
-                # Non-note document types (future: calendar, deck, files)
-                # For now, keep them with excerpts
-                accessible_results.append(result)
-                full_contents.append(None)
+        # Limit concurrent requests to prevent connection pool exhaustion
+        max_concurrent = 20
+        semaphore = anyio.Semaphore(max_concurrent)
+
+        async def fetch_content(index: int, result: SemanticSearchResult):
+            """Fetch full content for a single document (parallel with semaphore)."""
+            async with semaphore:
+                if result.doc_type == "note":
+                    try:
+                        note = await client.notes.get_note(result.id)
+                        # Note is accessible, store result and full content
+                        content = note.get("content", "")
+                        accessible_results[index] = result
+                        full_contents[index] = content
+                        logger.debug(
+                            f"Fetched full content for note {result.id} "
+                            f"(length: {len(content)} chars)"
+                        )
+                    except Exception as e:
+                        # Note might have been deleted or permissions changed
+                        # Leave as None to filter out later
+                        logger.debug(
+                            f"Note {result.id} not accessible: {e}. "
+                            f"Excluding from results."
+                        )
+                else:
+                    # Non-note document types (future: calendar, deck, files)
+                    # For now, keep them with excerpts
+                    accessible_results[index] = result
+                    # full_contents[index] remains None (will use excerpt)
+
+        # Run all fetches in parallel using anyio task group
+        async with anyio.create_task_group() as tg:
+            for idx, result in enumerate(search_response.results):
+                tg.start_soon(fetch_content, idx, result)
+
+        # Filter out None (inaccessible notes) while preserving order
+        final_pairs = [
+            (r, c) for r, c in zip(accessible_results, full_contents) if r is not None
+        ]
+        accessible_results = [r for r, c in final_pairs]
+        full_contents = [c for r, c in final_pairs]

        # Check if we filtered out all results
        if not accessible_results:
@@ -424,7 +499,6 @@ def configure_semantic_tools(mcp: FastMCP):
        )

        # 6. Request LLM completion via MCP sampling with timeout
-        import anyio

        try:
            with anyio.fail_after(30):
@@ -64,20 +64,6 @@ def configure_webdav_tools(mcp: FastMCP):
            - Text files are decoded to UTF-8
            - Documents (PDF, DOCX, etc.) are parsed and text is extracted
            - Other binary files are base64 encoded
-
-        Examples:
-            # Read a text file
-            result = await nc_webdav_read_file("Documents/readme.txt")
-            logger.info(result['content'])  # Decoded text content
-
-            # Read a PDF document (automatically parsed)
-            result = await nc_webdav_read_file("Documents/report.pdf")
-            logger.info(result['content'])  # Extracted text from PDF
-            logger.info(result['parsing_metadata'])  # Document parsing info
-
-            # Read a binary file
-            result = await nc_webdav_read_file("Images/photo.jpg")
-            logger.info(result['encoding'])  # 'base64'
        """
        client = await get_client(ctx)
        content, content_type = await client.webdav.read_file(path)
@@ -0,0 +1,60 @@
+"""Smithery-specific entrypoint for stateless deployment.
+
+ADR-016: This entrypoint is used when deploying on Smithery's hosting platform.
+It configures the server for stateless operation with per-session authentication.
+
+Features disabled in Smithery mode:
+- Vector sync / semantic search (no persistent storage)
+- Admin UI at /app (no webhooks, no vector viz)
+- OAuth provisioning tools (no token storage)
+
+Features enabled:
+- Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
+- Per-session app password authentication via Smithery configSchema
+- Health check endpoints (/health/live, /health/ready)
+"""
+
+import logging
+import os
+
+import uvicorn
+
+from nextcloud_mcp_server.config import setup_logging
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    """Start the MCP server in Smithery stateless mode."""
+    # Setup logging first
+    setup_logging()
+
+    # Force stateless mode environment variables
+    os.environ["SMITHERY_DEPLOYMENT"] = "true"
+    os.environ["VECTOR_SYNC_ENABLED"] = "false"
+
+    logger.info("Starting Nextcloud MCP Server in Smithery stateless mode")
+
+    # Import app after setting environment variables
+    from nextcloud_mcp_server.app import get_app
+
+    # Create the app with streamable-http transport (required for Smithery)
+    app = get_app(transport="streamable-http")
+
+    # Smithery sets PORT environment variable
+    port = int(os.environ.get("PORT", 8081))
+
+    logger.info(f"Listening on port {port}")
+
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        log_level="info",
+        # Disable access log for cleaner output
+        access_log=False,
+    )
+
+
+if __name__ == "__main__":
+    main()
@@ -1,51 +1,97 @@
-"""Document chunking for large texts."""
+"""Document chunking for large texts using LangChain text splitters."""

 import logging
+from dataclasses import dataclass
+
+from langchain_text_splitters import RecursiveCharacterTextSplitter

 logger = logging.getLogger(__name__)


-class DocumentChunker:
-    """Chunk large documents for optimal embedding."""
+@dataclass
+class ChunkWithPosition:
+    """A text chunk with its character position in the original document."""

-    def __init__(self, chunk_size: int = 512, overlap: int = 50):
+    text: str
+    start_offset: int  # Character position where chunk starts
+    end_offset: int  # Character position where chunk ends (exclusive)
+    page_number: int | None = None  # Page number for PDF chunks (optional)
+    metadata: dict | None = None  # Additional processor-specific metadata (optional)
+
+
+class DocumentChunker:
+    """Chunk large documents for optimal embedding using LangChain text splitters.
+
+    Uses RecursiveCharacterTextSplitter which preserves semantic boundaries
+    by splitting on sentence and paragraph boundaries before resorting to
+    character-level splitting.
+    """
+
+    def __init__(self, chunk_size: int = 2048, overlap: int = 200):
        """
        Initialize document chunker.

        Args:
-            chunk_size: Number of words per chunk (default: 512)
-            overlap: Number of overlapping words between chunks (default: 50)
+            chunk_size: Number of characters per chunk (default: 2048)
+            overlap: Number of overlapping characters between chunks (default: 200)
        """
        self.chunk_size = chunk_size
        self.overlap = overlap

-    def chunk_text(self, content: str) -> list[str]:
-        """
-        Split text into overlapping chunks.
+        # Initialize LangChain RecursiveCharacterTextSplitter
+        # Uses hierarchical splitting to preserve semantic boundaries:
+        # - Paragraphs (\n\n)
+        # - Sentences (. ! ?)
+        # - Words (spaces)
+        # - Characters (last resort)
+        # This prevents mid-sentence splitting while maintaining semantic coherence
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=overlap,
+            add_start_index=True,  # Enable position tracking
+            strip_whitespace=True,
+        )

-        Uses simple word-based chunking with configurable overlap to preserve
-        context across chunk boundaries.
+    async def chunk_text(self, content: str) -> list[ChunkWithPosition]:
+        """
+        Split text into overlapping chunks with position tracking.
+
+        Uses LangChain's RecursiveCharacterTextSplitter to create chunks that
+        preserve semantic boundaries by splitting at paragraphs and sentences
+        before resorting to word or character-level splitting. This ensures
+        sentences are kept intact. Preserves character positions for each chunk
+        to enable precise document retrieval.

        Args:
            content: Text content to chunk

        Returns:
-            List of text chunks (may be single item if content is small)
+            List of chunks with their character positions in the original content
        """
-        # Simple word-based chunking
-        words = content.split()
+        import anyio

-        if len(words) <= self.chunk_size:
-            return [content]
+        # Handle empty content - return single empty chunk for backward compatibility
+        if not content:
+            return [ChunkWithPosition(text="", start_offset=0, end_offset=0)]

-        chunks = []
-        start = 0
+        # Run CPU-bound text splitting in thread pool to avoid blocking event loop
+        docs = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+            self.splitter.create_documents,
+            [content],
+        )

-        while start < len(words):
-            end = start + self.chunk_size
-            chunk_words = words[start:end]
-            chunks.append(" ".join(chunk_words))
-            start = end - self.overlap
+        # Convert LangChain Documents to ChunkWithPosition objects
+        chunks = [
+            ChunkWithPosition(
+                text=doc.page_content,
+                start_offset=doc.metadata.get("start_index", 0),
+                end_offset=doc.metadata.get("start_index", 0) + len(doc.page_content),
+            )
+            for doc in docs
+        ]

-        logger.debug(f"Chunked document into {len(chunks)} chunks ({len(words)} words)")
+        logger.debug(
+            f"Chunked document into {len(chunks)} chunks "
+            f"(chunk_size={self.chunk_size}, overlap={self.overlap})"
+        )
        return chunks
@@ -0,0 +1,140 @@
+"""Custom PCA implementation for dimensionality reduction.
+
+Implements Principal Component Analysis without scikit-learn dependency.
+Used for reducing high-dimensional embeddings (768-dim) to 2D for visualization.
+"""
+
+import logging
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class PCA:
+    """Principal Component Analysis for dimensionality reduction.
+
+    Simple implementation that finds principal components via eigendecomposition
+    of the covariance matrix. Suitable for small-to-medium datasets.
+
+    Attributes:
+        n_components: Number of principal components to keep
+        mean_: Mean of training data (set during fit)
+        components_: Principal components (eigenvectors)
+        explained_variance_: Variance explained by each component
+        explained_variance_ratio_: Fraction of total variance explained
+    """
+
+    def __init__(self, n_components: int = 2):
+        """Initialize PCA.
+
+        Args:
+            n_components: Number of components to keep (default: 2)
+        """
+        if n_components < 1:
+            raise ValueError(f"n_components must be >= 1, got {n_components}")
+
+        self.n_components = n_components
+        self.mean_: np.ndarray | None = None
+        self.components_: np.ndarray | None = None
+        self.explained_variance_: np.ndarray | None = None
+        self.explained_variance_ratio_: np.ndarray | None = None
+
+    def fit(self, X: np.ndarray) -> "PCA":
+        """Fit PCA model to data.
+
+        Args:
+            X: Training data of shape (n_samples, n_features)
+
+        Returns:
+            self (for method chaining)
+
+        Raises:
+            ValueError: If X has fewer features than n_components
+        """
+        X = np.asarray(X)
+
+        if X.ndim != 2:
+            raise ValueError(f"X must be 2D array, got shape {X.shape}")
+
+        n_samples, n_features = X.shape
+
+        if n_features < self.n_components:
+            raise ValueError(
+                f"n_components={self.n_components} > n_features={n_features}"
+            )
+
+        # Center data
+        self.mean_ = np.mean(X, axis=0)
+        X_centered = X - self.mean_
+
+        # Compute covariance matrix
+        # Use (X^T X) / (n-1) for numerical stability with high-dim data
+        cov = np.cov(X_centered.T)
+
+        # Eigendecomposition
+        eigenvalues, eigenvectors = np.linalg.eigh(cov)
+
+        # Sort by eigenvalue (descending)
+        idx = np.argsort(eigenvalues)[::-1]
+        eigenvalues = eigenvalues[idx]
+        eigenvectors = eigenvectors[:, idx]
+
+        # Keep top n_components
+        self.components_ = eigenvectors[:, : self.n_components].T
+        self.explained_variance_ = eigenvalues[: self.n_components]
+
+        # Calculate explained variance ratio
+        total_variance = np.sum(eigenvalues)
+        if total_variance > 0:
+            self.explained_variance_ratio_ = self.explained_variance_ / total_variance
+        else:
+            self.explained_variance_ratio_ = np.zeros(self.n_components)
+
+        logger.debug(
+            f"PCA fit: {n_samples} samples, {n_features} features → "
+            f"{self.n_components} components, "
+            f"explained variance: {self.explained_variance_ratio_}"
+        )
+
+        return self
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """Transform data to principal component space.
+
+        Args:
+            X: Data to transform of shape (n_samples, n_features)
+
+        Returns:
+            Transformed data of shape (n_samples, n_components)
+
+        Raises:
+            ValueError: If PCA not fitted yet
+        """
+        if self.mean_ is None or self.components_ is None:
+            raise ValueError("PCA not fitted yet. Call fit() first.")
+
+        X = np.asarray(X)
+
+        if X.ndim != 2:
+            raise ValueError(f"X must be 2D array, got shape {X.shape}")
+
+        # Center using training mean
+        X_centered = X - self.mean_
+
+        # Project onto principal components
+        X_transformed = np.dot(X_centered, self.components_.T)
+
+        return X_transformed
+
+    def fit_transform(self, X: np.ndarray) -> np.ndarray:
+        """Fit PCA model and transform data in one step.
+
+        Args:
+            X: Training data of shape (n_samples, n_features)
+
+        Returns:
+            Transformed data of shape (n_samples, n_components)
+        """
+        self.fit(X)
+        return self.transform(X)
@@ -0,0 +1,306 @@
+"""Placeholder point management for Qdrant state tracking.
+
+Placeholders are zero-vector points stored in Qdrant to track document processing
+state. They prevent duplicate work by marking documents as "in-flight" during the
+gap between scanner queuing and processor completion.
+
+Architecture:
+- Scanner writes placeholders when queuing documents for processing
+- Processor deletes placeholders and writes real vectors after processing
+- All user-facing queries filter out placeholders (is_placeholder: False)
+
+Placeholders contain:
+- Zero vectors (dimension from embedding service)
+- is_placeholder: True flag (for filtering)
+- status: "pending", "processing", "completed", "failed"
+- modified_at, etag from source document
+- queued_at timestamp
+"""
+
+import logging
+import time
+import uuid
+
+from qdrant_client.models import FieldCondition, Filter, MatchValue, PointStruct
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.embedding import get_embedding_service
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+def _generate_placeholder_id(doc_type: str, doc_id: str | int) -> str:
+    """Generate deterministic UUID for placeholder point.
+
+    Args:
+        doc_type: Document type (note, file, etc.)
+        doc_id: Document ID
+
+    Returns:
+        UUID string for point ID
+    """
+    point_name = f"{doc_type}:{doc_id}:placeholder"
+    return str(uuid.uuid5(uuid.NAMESPACE_DNS, point_name))
+
+
+async def write_placeholder_point(
+    doc_id: str | int,
+    doc_type: str,
+    user_id: str,
+    modified_at: int,
+    etag: str = "",
+    file_path: str | None = None,
+) -> None:
+    """Write a placeholder point to Qdrant to mark document as queued.
+
+    This should be called by the scanner BEFORE queuing a document for processing.
+    The placeholder prevents duplicate work if the scanner runs again before
+    processing completes.
+
+    Args:
+        doc_id: Document ID (int for notes/files)
+        doc_type: Document type (note, file, etc.)
+        user_id: User ID who owns the document
+        modified_at: Document modification timestamp
+        etag: Document ETag (if available)
+        file_path: File path (for files only)
+
+    Raises:
+        Exception: If Qdrant write fails
+    """
+    try:
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+        embedding_service = get_embedding_service()
+
+        # Get dimension dynamically (never hardcode)
+        dimension = embedding_service.get_dimension()
+
+        # Create zero vectors
+        zero_dense = [0.0] * dimension
+
+        # Create empty sparse vector for placeholders
+        # Use models.SparseVector with empty indices/values
+        from qdrant_client import models
+
+        empty_sparse = models.SparseVector(indices=[], values=[])
+
+        # Generate deterministic point ID
+        point_id = _generate_placeholder_id(doc_type, doc_id)
+
+        # Build payload
+        payload = {
+            "user_id": user_id,
+            "doc_id": doc_id,
+            "doc_type": doc_type,
+            "is_placeholder": True,
+            "status": "pending",
+            "modified_at": modified_at,
+            "etag": etag,
+            "queued_at": int(time.time()),
+        }
+
+        # Add file_path for files
+        if doc_type == "file" and file_path:
+            payload["file_path"] = file_path
+
+        # Create placeholder point
+        point = PointStruct(
+            id=point_id,
+            vector={
+                "dense": zero_dense,
+                "sparse": empty_sparse,  # Empty sparse vector for placeholders
+            },
+            payload=payload,
+        )
+
+        # Upsert to Qdrant
+        await qdrant_client.upsert(
+            collection_name=settings.get_collection_name(),
+            points=[point],
+            wait=True,
+        )
+
+        logger.debug(
+            f"Wrote placeholder for {doc_type}_{doc_id} (user={user_id}, "
+            f"modified_at={modified_at})"
+        )
+
+    except Exception as e:
+        logger.error(
+            f"Failed to write placeholder for {doc_type}_{doc_id}: {e}",
+            exc_info=True,
+        )
+        raise
+
+
+async def query_document_metadata(
+    doc_id: str | int,
+    doc_type: str,
+    user_id: str,
+) -> dict | None:
+    """Query Qdrant for existing document entry (placeholder or real).
+
+    Returns the payload of the first matching point, which could be:
+    - A placeholder (is_placeholder: True)
+    - A real indexed document (is_placeholder: False or missing)
+    - None if document not in Qdrant
+
+    Args:
+        doc_id: Document ID
+        doc_type: Document type
+        user_id: User ID
+
+    Returns:
+        Payload dict if found, None otherwise
+    """
+    try:
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Query for any entry matching doc_id, doc_type, user_id
+        scroll_result = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
+                ]
+            ),
+            limit=1,
+            with_payload=True,
+            with_vectors=False,
+        )
+
+        if scroll_result[0]:
+            point = scroll_result[0][0]
+            return dict(point.payload)
+
+        return None
+
+    except Exception as e:
+        logger.warning(f"Error querying document metadata for {doc_type}_{doc_id}: {e}")
+        return None
+
+
+async def delete_placeholder_point(
+    doc_id: str | int,
+    doc_type: str,
+    user_id: str,
+) -> None:
+    """Delete a placeholder point from Qdrant.
+
+    This should be called by the processor BEFORE writing real vectors.
+    We delete the placeholder to avoid duplicates, then write the real chunks.
+
+    Args:
+        doc_id: Document ID
+        doc_type: Document type
+        user_id: User ID
+
+    Raises:
+        Exception: If Qdrant delete fails
+    """
+    try:
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Delete by filter (in case there are multiple chunks from old indexing)
+        await qdrant_client.delete(
+            collection_name=settings.get_collection_name(),
+            points_selector=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
+                    FieldCondition(key="is_placeholder", match=MatchValue(value=True)),
+                ]
+            ),
+        )
+
+        logger.debug(f"Deleted placeholder for {doc_type}_{doc_id} (user={user_id})")
+
+    except Exception as e:
+        logger.error(
+            f"Failed to delete placeholder for {doc_type}_{doc_id}: {e}",
+            exc_info=True,
+        )
+        raise
+
+
+async def update_placeholder_status(
+    doc_id: str | int,
+    doc_type: str,
+    user_id: str,
+    status: str,
+) -> None:
+    """Update the status field of a placeholder point.
+
+    Status values:
+    - "pending": Queued for processing
+    - "processing": Currently being processed
+    - "completed": Processing completed successfully
+    - "failed": Processing failed
+
+    Args:
+        doc_id: Document ID
+        doc_type: Document type
+        user_id: User ID
+        status: New status value
+
+    Raises:
+        Exception: If Qdrant update fails
+    """
+    try:
+        qdrant_client = await get_qdrant_client()
+        settings = get_settings()
+
+        # Update payload using set_payload
+        await qdrant_client.set_payload(
+            collection_name=settings.get_collection_name(),
+            payload={"status": status},
+            points=Filter(
+                must=[
+                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                    FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
+                    FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
+                    FieldCondition(key="is_placeholder", match=MatchValue(value=True)),
+                ]
+            ),
+        )
+
+        logger.debug(
+            f"Updated placeholder status for {doc_type}_{doc_id} to '{status}' "
+            f"(user={user_id})"
+        )
+
+    except Exception as e:
+        logger.warning(
+            f"Failed to update placeholder status for {doc_type}_{doc_id}: {e}"
+        )
+        # Don't raise - status updates are non-critical
+
+
+def get_placeholder_filter() -> FieldCondition:
+    """Get a filter condition to exclude placeholders from queries.
+
+    Add this to all user-facing search/visualization queries to ensure
+    placeholders are never returned to users.
+
+    Returns:
+        FieldCondition that filters out is_placeholder: True
+
+    Example:
+        Filter(
+            must=[
+                get_placeholder_filter(),  # Exclude placeholders
+                FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+            ]
+        )
+    """
+    return FieldCondition(
+        key="is_placeholder",
+        match=MatchValue(value=False),
+    )
@@ -8,13 +8,14 @@ import time
 import uuid

 import anyio
+from anyio.abc import TaskStatus
 from anyio.streams.memory import MemoryObjectReceiveStream
 from httpx import HTTPStatusError
 from qdrant_client.models import FieldCondition, Filter, MatchValue, PointStruct

 from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import get_settings
-from nextcloud_mcp_server.embedding import get_embedding_service
+from nextcloud_mcp_server.embedding import get_bm25_service, get_embedding_service
 from nextcloud_mcp_server.observability.metrics import (
    record_qdrant_operation,
    record_vector_sync_processing,
@@ -22,18 +23,58 @@ from nextcloud_mcp_server.observability.metrics import (
 )
 from nextcloud_mcp_server.observability.tracing import trace_operation
 from nextcloud_mcp_server.vector.document_chunker import DocumentChunker
+from nextcloud_mcp_server.vector.placeholder import delete_placeholder_point
 from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
 from nextcloud_mcp_server.vector.scanner import DocumentTask

 logger = logging.getLogger(__name__)


+def assign_page_numbers(chunks, page_boundaries):
+    """Assign page numbers to chunks based on page boundaries.
+
+    Each chunk gets the page number where most of its content appears.
+    For chunks spanning multiple pages, assigns the page containing the
+    majority of the chunk's characters.
+
+    Args:
+        chunks: List of ChunkWithPosition objects
+        page_boundaries: List of dicts with {page, start_offset, end_offset}
+
+    Returns:
+        None (modifies chunks in place)
+    """
+    if not page_boundaries:
+        return
+
+    for chunk in chunks:
+        # Find which page(s) this chunk overlaps with
+        max_overlap = 0
+        assigned_page = None
+
+        for boundary in page_boundaries:
+            # Calculate overlap between chunk and page
+            overlap_start = max(chunk.start_offset, boundary["start_offset"])
+            overlap_end = min(chunk.end_offset, boundary["end_offset"])
+            overlap = max(0, overlap_end - overlap_start)
+
+            # Assign to page with maximum overlap
+            if overlap > max_overlap:
+                max_overlap = overlap
+                assigned_page = boundary["page"]
+
+        if assigned_page is not None:
+            chunk.page_number = assigned_page
+
+
 async def processor_task(
    worker_id: int,
    receive_stream: MemoryObjectReceiveStream[DocumentTask],
    shutdown_event: anyio.Event,
    nc_client: NextcloudClient,
    user_id: str,
+    *,
+    task_status: TaskStatus = anyio.TASK_STATUS_IGNORED,
 ):
    """
    Process documents from stream concurrently.
@@ -53,9 +94,13 @@ async def processor_task(
        shutdown_event: Event signaling shutdown
        nc_client: Authenticated Nextcloud client
        user_id: User being processed
+        task_status: Status object for signaling task readiness
    """
    logger.info(f"Processor {worker_id} started")

+    # Signal that the task has started and is ready
+    task_status.started()
+
    while not shutdown_event.is_set():
        try:
            # Get document with timeout (allows checking shutdown)
@@ -211,30 +256,273 @@ async def _index_document(
    settings = get_settings()

    # Fetch document content
-    if doc_task.doc_type == "note":
-        document = await nc_client.notes.get_note(int(doc_task.doc_id))
-        content = f"{document['title']}\n\n{document['content']}"
-        title = document["title"]
-        etag = document.get("etag", "")
-    else:
-        raise ValueError(f"Unsupported doc_type: {doc_task.doc_type}")
+    with trace_operation(
+        "vector_sync.fetch_content",
+        attributes={
+            "vector_sync.doc_type": doc_task.doc_type,
+            "vector_sync.doc_id": doc_task.doc_id,
+        },
+    ):
+        if doc_task.doc_type == "note":
+            document = await nc_client.notes.get_note(int(doc_task.doc_id))
+            content = f"{document['title']}\n\n{document['content']}"
+            title = document["title"]
+            etag = document.get("etag", "")
+            file_metadata = {}  # No file-specific metadata for notes
+            file_path = None  # Notes don't have file paths
+            content_bytes = None  # Notes don't have binary content
+            content_type = None
+        elif doc_task.doc_type == "file":
+            # For files, doc_id is now the numeric file ID, file_path comes from DocumentTask
+            if not doc_task.file_path:
+                raise ValueError(
+                    f"File path required for file indexing but not provided (file_id={doc_task.doc_id})"
+                )
+            file_path = doc_task.file_path
+
+            # Read file content via WebDAV
+            content_bytes, content_type = await nc_client.webdav.read_file(file_path)
+        else:
+            raise ValueError(f"Unsupported doc_type: {doc_task.doc_type}")
+
+    # Process file content (text extraction)
+    if doc_task.doc_type == "file":
+        # Type narrowing: content_bytes and content_type are set for files
+        assert content_bytes is not None
+        assert content_type is not None
+        assert file_path is not None
+
+        with trace_operation(
+            "vector_sync.document_process",
+            attributes={
+                "vector_sync.content_type": content_type,
+                "vector_sync.file_size": len(content_bytes),
+            },
+        ):
+            # Use document processor registry to extract text
+            from nextcloud_mcp_server.document_processors import get_registry
+
+            registry = get_registry()
+
+            try:
+                result = await registry.process(
+                    content=content_bytes,
+                    content_type=content_type,
+                    filename=file_path,
+                )
+                content = result.text
+                file_metadata = result.metadata
+                title = file_metadata.get("title") or file_path.split("/")[-1]
+                etag = ""  # WebDAV read_file doesn't return etag
+
+                # Diagnostic: Log page boundary information if available
+                if "page_boundaries" in file_metadata:
+                    page_boundaries = file_metadata["page_boundaries"]
+                    logger.info(
+                        f"Page boundaries for {file_path}: "
+                        f"{len(page_boundaries)} pages, text length: {len(content)}"
+                    )
+                    # Log first 3 page boundaries for debugging
+                    for boundary in page_boundaries[:3]:
+                        logger.debug(
+                            f"  Page {boundary['page']}: "
+                            f"offsets [{boundary['start_offset']}:{boundary['end_offset']}]"
+                        )
+                    # Verify last boundary matches text length
+                    if page_boundaries:
+                        last_boundary = page_boundaries[-1]
+                        if last_boundary["end_offset"] != len(content):
+                            logger.warning(
+                                f"Text length mismatch: content={len(content)}, "
+                                f"last_boundary_end={last_boundary['end_offset']}"
+                            )
+                else:
+                    logger.debug(f"No page_boundaries in metadata for {file_path}")
+            except Exception as e:
+                logger.error(f"Failed to process file {file_path}: {e}")
+                raise

    # Tokenize and chunk (using configured chunk size and overlap)
-    chunker = DocumentChunker(
-        chunk_size=settings.document_chunk_size,
-        overlap=settings.document_chunk_overlap,
-    )
-    chunks = chunker.chunk_text(content)
+    with trace_operation(
+        "vector_sync.chunk_text",
+        attributes={
+            "vector_sync.input_chars": len(content),
+            "vector_sync.chunk_size": settings.document_chunk_size,
+            "vector_sync.overlap": settings.document_chunk_overlap,
+        },
+    ):
+        chunker = DocumentChunker(
+            chunk_size=settings.document_chunk_size,
+            overlap=settings.document_chunk_overlap,
+        )
+        chunks = await chunker.chunk_text(content)

-    # Generate embeddings (I/O bound - external API call)
-    embedding_service = get_embedding_service()
-    embeddings = await embedding_service.embed_batch(chunks)
+    # Assign page numbers to chunks if page boundaries are available (PDFs)
+    if doc_task.doc_type == "file" and "page_boundaries" in file_metadata:
+        with trace_operation(
+            "vector_sync.assign_page_numbers",
+            attributes={
+                "vector_sync.chunk_count": len(chunks),
+                "vector_sync.page_count": len(file_metadata["page_boundaries"]),
+            },
+        ):
+            assign_page_numbers(chunks, file_metadata["page_boundaries"])
+
+            # Diagnostic: Verify page number assignment
+            assigned_count = sum(1 for c in chunks if c.page_number is not None)
+            logger.info(
+                f"Assigned page numbers to {assigned_count}/{len(chunks)} chunks "
+                f"for {file_path}"
+            )
+
+            # Log first 3 chunks to see their page assignments
+            for i, chunk in enumerate(chunks[:3]):
+                logger.debug(
+                    f"  Chunk {i}: page={chunk.page_number}, "
+                    f"offsets=[{chunk.start_offset}:{chunk.end_offset}]"
+                )
+
+            # Warning if NO page numbers were assigned
+            if assigned_count == 0:
+                logger.warning(
+                    f"NO page numbers assigned! "
+                    f"Text length: {len(content)}, "
+                    f"Chunks: {len(chunks)}, "
+                    f"Chunk offset range: [{chunks[0].start_offset}:{chunks[-1].end_offset}], "
+                    f"Page boundaries: {len(file_metadata['page_boundaries'])} pages, "
+                    f"First boundary: {file_metadata['page_boundaries'][0] if file_metadata['page_boundaries'] else 'None'}"
+                )
+
+    # Extract chunk texts for embedding
+    chunk_texts = [chunk.text for chunk in chunks]
+
+    # Initialize results containers
+    dense_embeddings: list = []
+    sparse_embeddings: list = []
+    chunk_images: dict[int, dict] = {}
+
+    # Determine if we need PDF highlighting
+    is_pdf = doc_task.doc_type == "file" and content_type == "application/pdf"
+
+    # Define async tasks for parallel execution
+    async def generate_dense_embeddings():
+        """Generate dense embeddings (I/O bound - external API call)."""
+        nonlocal dense_embeddings
+        with trace_operation(
+            "vector_sync.embed_dense",
+            attributes={
+                "vector_sync.chunk_count": len(chunk_texts),
+                "vector_sync.total_chars": sum(len(t) for t in chunk_texts),
+            },
+        ):
+            embedding_service = get_embedding_service()
+            dense_embeddings = await embedding_service.embed_batch(chunk_texts)
+
+    async def generate_sparse_embeddings():
+        """Generate sparse embeddings (BM25 for keyword matching)."""
+        nonlocal sparse_embeddings
+        with trace_operation(
+            "vector_sync.embed_sparse",
+            attributes={
+                "vector_sync.chunk_count": len(chunk_texts),
+            },
+        ):
+            bm25_service = get_bm25_service()
+            sparse_embeddings = await bm25_service.encode_batch(chunk_texts)
+
+    async def generate_highlights():
+        """Generate highlighted page images for PDF chunks (CPU-bound)."""
+        nonlocal chunk_images
+        if not is_pdf:
+            return
+
+        # Type narrowing: content_bytes is set for PDF files
+        assert content_bytes is not None
+
+        with trace_operation(
+            "vector_sync.generate_highlights",
+            attributes={
+                "vector_sync.chunk_count": len(chunks),
+                "vector_sync.pdf_size": len(content_bytes),
+            },
+        ):
+            import base64
+
+            from nextcloud_mcp_server.search.pdf_highlighter import PDFHighlighter
+
+            # Build chunk data for batch processing
+            # Format: (chunk_index, start_offset, end_offset, page_number, chunk_text)
+            chunk_data: list[tuple[int, int, int, int | None, str]] = [
+                (i, chunk.start_offset, chunk.end_offset, chunk.page_number, chunk.text)
+                for i, chunk in enumerate(chunks)
+                if chunk.page_number is not None
+            ]
+
+            # Get pre-computed page boundaries from document processor
+            page_boundaries = file_metadata.get("page_boundaries")
+            if not page_boundaries:
+                logger.warning("No page boundaries available, skipping highlighting")
+                return
+
+            logger.info(
+                f"Batch generating highlighted page images for {len(chunk_data)} PDF chunks"
+            )
+
+            # Run CPU-bound highlighting in thread pool
+            # Pass pre-computed page boundaries and full text to avoid re-processing the PDF
+            batch_results = await anyio.to_thread.run_sync(  # type: ignore[attr-defined]
+                lambda: PDFHighlighter.highlight_chunks_batch(
+                    pdf_bytes=content_bytes,
+                    chunks=chunk_data,
+                    page_boundaries=page_boundaries,
+                    full_text=content,
+                    color="yellow",
+                    zoom=2.0,
+                )
+            )
+
+            # Convert results to storage format
+            for chunk_index, (
+                png_bytes,
+                actual_page_num,
+                highlight_count,
+            ) in batch_results.items():
+                image_base64 = base64.b64encode(png_bytes).decode("utf-8")
+                chunk_images[chunk_index] = {
+                    "image": image_base64,
+                    "page": actual_page_num,
+                    "highlights": highlight_count,
+                    "size": len(png_bytes),
+                }
+
+            logger.info(
+                f"Generated {len(chunk_images)}/{len(chunks)} highlighted page images "
+                f"(avg {sum(img['size'] for img in chunk_images.values()) // max(len(chunk_images), 1):,} bytes)"
+            )
+
+    # Run all embedding/highlighting operations in parallel
+    # - Dense embeddings: I/O bound (API call)
+    # - Sparse embeddings: CPU bound (local BM25)
+    # - Highlighting: CPU bound (PyMuPDF rendering, runs in thread pool)
+    with trace_operation(
+        "vector_sync.parallel_processing",
+        attributes={
+            "vector_sync.is_pdf": is_pdf,
+            "vector_sync.chunk_count": len(chunks),
+        },
+    ):
+        async with anyio.create_task_group() as tg:
+            tg.start_soon(generate_dense_embeddings)
+            tg.start_soon(generate_sparse_embeddings)
+            tg.start_soon(generate_highlights)

    # Prepare Qdrant points
    indexed_at = int(time.time())
    points = []

-    for i, (chunk, embedding) in enumerate(zip(chunks, embeddings)):
+    for i, (chunk, dense_emb, sparse_emb) in enumerate(
+        zip(chunks, dense_embeddings, sparse_embeddings)
+    ):
        # Generate deterministic UUID for point ID
        # Using uuid5 with DNS namespace and combining doc info
        point_name = f"{doc_task.doc_type}:{doc_task.doc_id}:chunk:{i}"
@@ -243,28 +531,93 @@ async def _index_document(
        points.append(
            PointStruct(
                id=point_id,
-                vector=embedding,
+                vector={
+                    "dense": dense_emb,
+                    "sparse": sparse_emb,
+                },
                payload={
                    "user_id": doc_task.user_id,
                    "doc_id": doc_task.doc_id,
                    "doc_type": doc_task.doc_type,
+                    "is_placeholder": False,  # Real indexed document (not placeholder)
                    "title": title,
-                    "excerpt": chunk[:200],
+                    "excerpt": chunk.text,  # Full chunk text (up to chunk_size, default 2048 chars)
                    "indexed_at": indexed_at,
                    "modified_at": doc_task.modified_at,
                    "etag": etag,
                    "chunk_index": i,
                    "total_chunks": len(chunks),
+                    "chunk_start_offset": chunk.start_offset,
+                    "chunk_end_offset": chunk.end_offset,
+                    "metadata_version": 2,  # v2 includes position metadata
+                    # File-specific metadata (PDF, etc.)
+                    **(
+                        {
+                            "file_path": file_path,  # Store file path for retrieval
+                            "mime_type": content_type,  # From WebDAV response
+                            "file_size": file_metadata.get("file_size"),
+                            "page_number": chunk.page_number,
+                            "page_count": file_metadata.get("page_count"),
+                            "author": file_metadata.get("author"),
+                            "creation_date": file_metadata.get("creation_date"),
+                            "has_images": file_metadata.get("has_images", False),
+                            "image_count": file_metadata.get("image_count", 0),
+                        }
+                        if doc_task.doc_type == "file"
+                        else {}
+                    ),
+                    # Highlighted page image (PDF only)
+                    **(
+                        {
+                            "highlighted_page_image": chunk_images[i]["image"],
+                            "highlighted_page_number": chunk_images[i]["page"],
+                            "highlight_count": chunk_images[i]["highlights"],
+                        }
+                        if i in chunk_images
+                        else {}
+                    ),
                },
            )
        )

-    # Upsert to Qdrant
-    await qdrant_client.upsert(
-        collection_name=settings.get_collection_name(),
-        points=points,
-        wait=True,
-    )
+    # Delete placeholder before writing real vectors
+    # This prevents duplicates and cleans up the placeholder state
+    try:
+        await delete_placeholder_point(
+            doc_id=doc_task.doc_id,
+            doc_type=doc_task.doc_type,
+            user_id=doc_task.user_id,
+        )
+    except Exception as e:
+        # Log but don't fail indexing if placeholder deletion fails
+        logger.warning(
+            f"Failed to delete placeholder for {doc_task.doc_type}_{doc_task.doc_id}: {e}"
+        )
+
+    # Upsert to Qdrant in batches to avoid timeout with large payloads
+    # Each batch is limited to avoid WriteTimeout when sending large image payloads
+    BATCH_SIZE = 10  # ~2MB per batch with images
+    with trace_operation(
+        "vector_sync.qdrant_upsert",
+        attributes={
+            "vector_sync.point_count": len(points),
+            "vector_sync.collection": settings.get_collection_name(),
+            "vector_sync.images_count": len(chunk_images),
+            "vector_sync.batch_size": BATCH_SIZE,
+        },
+    ):
+        for batch_start in range(0, len(points), BATCH_SIZE):
+            batch_end = min(batch_start + BATCH_SIZE, len(points))
+            batch = points[batch_start:batch_end]
+            await qdrant_client.upsert(
+                collection_name=settings.get_collection_name(),
+                points=batch,
+                wait=True,
+            )
+            if batch_end < len(points):
+                logger.debug(
+                    f"Upserted batch {batch_start // BATCH_SIZE + 1}/{(len(points) + BATCH_SIZE - 1) // BATCH_SIZE}"
+                )

    logger.info(
        f"Indexed {doc_task.doc_type}_{doc_task.doc_id} for {doc_task.user_id} "
@@ -2,7 +2,7 @@

 import logging

-from qdrant_client import AsyncQdrantClient
+from qdrant_client import AsyncQdrantClient, models
 from qdrant_client.models import Distance, VectorParams

 from nextcloud_mcp_server.config import get_settings
@@ -84,45 +84,62 @@ async def get_qdrant_client() -> AsyncQdrantClient:
                f"Collection '{collection_name}' found, validating dimensions..."
            )
            collection_info = await _qdrant_client.get_collection(collection_name)
-            actual_dimension = collection_info.config.params.vectors.size
+            # Handle both named vectors (dict) and legacy single vector
+            vectors = collection_info.config.params.vectors
+            if isinstance(vectors, dict):
+                actual_dimension = vectors["dense"].size
+            else:
+                actual_dimension = vectors.size

            # Validate dimension matches
            if actual_dimension != expected_dimension:
+                embedding_model = settings.get_embedding_model_name()
                raise ValueError(
                    f"Dimension mismatch for collection '{collection_name}':\n"
-                    f"  Expected: {expected_dimension} (from embedding model '{settings.ollama_embedding_model}')\n"
+                    f"  Expected: {expected_dimension} (from embedding model '{embedding_model}')\n"
                    f"  Found: {actual_dimension}\n"
                    f"This usually means you changed the embedding model.\n"
                    f"Solutions:\n"
                    f"  1. Delete the old collection: Collection will be recreated with new dimensions\n"
                    f"  2. Set QDRANT_COLLECTION to use a different collection name\n"
-                    f"  3. Revert OLLAMA_EMBEDDING_MODEL to the original model"
+                    f"  3. Revert to the original embedding model"
                )

            logger.info(
                f"Using existing Qdrant collection: {collection_name} "
-                f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
+                f"(dimension={actual_dimension}, model={settings.get_embedding_model_name()})"
            )

        else:
            # Collection doesn't exist - create it
+            embedding_model = settings.get_embedding_model_name()
            logger.info(
                f"Collection '{collection_name}' not found, creating with "
-                f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
+                f"dimension={expected_dimension}, model={embedding_model}..."
            )
            await _qdrant_client.create_collection(
                collection_name=collection_name,
-                vectors_config=VectorParams(
-                    size=expected_dimension,
-                    distance=Distance.COSINE,
-                ),
+                vectors_config={
+                    "dense": VectorParams(
+                        size=expected_dimension,
+                        distance=Distance.COSINE,
+                    ),
+                },
+                sparse_vectors_config={
+                    "sparse": models.SparseVectorParams(
+                        index=models.SparseIndexParams(
+                            on_disk=False,
+                        )
+                    ),
+                },
            )
            logger.info(
                f"Created Qdrant collection: {collection_name}\n"
-                f"  Dimension: {expected_dimension}\n"
-                f"  Model: {settings.ollama_embedding_model}\n"
+                f"  Dense vector dimension: {expected_dimension}\n"
+                f"  Dense embedding model: {embedding_model}\n"
+                f"  Sparse vectors: BM25 (for hybrid search)\n"
                f"  Distance: COSINE\n"
-                f"Background sync will index all documents with this embedding model."
+                f"Background sync will index all documents with dense + sparse vectors."
            )

    return _qdrant_client
@@ -4,10 +4,12 @@ Periodically scans enabled users' content and queues changed documents for proce
 """

 import logging
+import os
 import time
 from dataclasses import dataclass

 import anyio
+from anyio.abc import TaskStatus
 from anyio.streams.memory import MemoryObjectSendStream
 from qdrant_client.models import FieldCondition, Filter, MatchValue

@@ -15,6 +17,10 @@ from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import get_settings
 from nextcloud_mcp_server.observability.metrics import record_vector_sync_scan
 from nextcloud_mcp_server.observability.tracing import trace_operation
+from nextcloud_mcp_server.vector.placeholder import (
+    query_document_metadata,
+    write_placeholder_point,
+)
 from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

 logger = logging.getLogger(__name__)
@@ -25,10 +31,11 @@ class DocumentTask:
    """Document task for processing queue."""

    user_id: str
-    doc_id: str
+    doc_id: int | str  # int for files/notes, str for legacy
    doc_type: str  # "note", "file", "calendar"
    operation: str  # "index" or "delete"
    modified_at: int
+    file_path: str | None = None  # File path for files (when doc_id is file_id)


 # Track documents potentially deleted (grace period before actual deletion)
@@ -93,6 +100,8 @@ async def scanner_task(
    wake_event: anyio.Event,
    nc_client: NextcloudClient,
    user_id: str,
+    *,
+    task_status: TaskStatus = anyio.TASK_STATUS_IGNORED,
 ):
    """
    Periodic scanner that detects changed documents for enabled user.
@@ -105,10 +114,14 @@ async def scanner_task(
        wake_event: Event to trigger immediate scan
        nc_client: Authenticated Nextcloud client
        user_id: User to scan
+        task_status: Status object for signaling task readiness
    """
    logger.info(f"Scanner task started for user: {user_id}")
    settings = get_settings()

+    # Signal that the task has started and is ready
+    task_status.started()
+
    async with send_stream:
        while not shutdown_event.is_set():
            try:
@@ -175,73 +188,48 @@ async def scan_user_documents(
                f"[SCAN-{scan_id}] Using pruneBefore={prune_before} to optimize data transfer"
            )

-        # Fetch all notes from Nextcloud
-        notes = [
-            note
-            async for note in nc_client.notes.get_all_notes(prune_before=prune_before)
-        ]
-        logger.info(f"[SCAN-{scan_id}] Found {len(notes)} notes for {user_id}")
+        # For deletion tracking, get all doc_ids in Qdrant (for incremental sync)
+        # Note: We no longer bulk-query indexed_at, instead check per-document
+        indexed_doc_ids = set()
+        if not initial_sync:
+            qdrant_client = await get_qdrant_client()
+            scroll_result = await qdrant_client.scroll(
+                collection_name=get_settings().get_collection_name(),
+                scroll_filter=Filter(
+                    must=[
+                        FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                        FieldCondition(key="doc_type", match=MatchValue(value="note")),
+                    ]
+                ),
+                with_payload=["doc_id"],
+                with_vectors=False,
+                limit=10000,
+            )

-        # Record documents scanned
-        record_vector_sync_scan(len(notes))
+            indexed_doc_ids = {point.payload["doc_id"] for point in scroll_result[0]}

-        if initial_sync:
-            # Send everything on first sync
-            for note in notes:
-                modified_at = note.get("modified", 0)
-                await send_stream.send(
-                    DocumentTask(
-                        user_id=user_id,
-                        doc_id=str(note["id"]),
-                        doc_type="note",
-                        operation="index",
-                        modified_at=modified_at,
-                    )
-                )
-            logger.info(f"Sent {len(notes)} documents for initial sync: {user_id}")
-            return
+            logger.debug(f"Found {len(indexed_doc_ids)} indexed documents in Qdrant")

-        # Get indexed state from Qdrant
-        qdrant_client = await get_qdrant_client()
-        scroll_result = await qdrant_client.scroll(
-            collection_name=get_settings().get_collection_name(),
-            scroll_filter=Filter(
-                must=[
-                    FieldCondition(key="user_id", match=MatchValue(value=user_id)),
-                    FieldCondition(key="doc_type", match=MatchValue(value="note")),
-                ]
-            ),
-            with_payload=["doc_id", "indexed_at"],
-            with_vectors=False,
-            limit=10000,
-        )
-
-        indexed_docs = {
-            point.payload["doc_id"]: point.payload["indexed_at"]
-            for point in scroll_result[0]
-        }
-
-        logger.debug(f"Found {len(indexed_docs)} indexed documents in Qdrant")
-
-        # Compare and queue changes
+        # Stream notes from Nextcloud and process immediately
+        note_count = 0
        queued = 0
-        nextcloud_doc_ids = {str(note["id"]) for note in notes}
+        nextcloud_doc_ids = set()

-        for note in notes:
+        async for note in nc_client.notes.get_all_notes(prune_before=prune_before):
+            note_count += 1
            doc_id = str(note["id"])
-            indexed_at = indexed_docs.get(doc_id)
+            nextcloud_doc_ids.add(doc_id)
            modified_at = note.get("modified", 0)

-            # If document reappeared, remove from potentially_deleted
-            doc_key = (user_id, doc_id)
-            if doc_key in _potentially_deleted:
-                logger.debug(
-                    f"Document {doc_id} reappeared, removing from deletion grace period"
+            if initial_sync:
+                # Send everything on first sync - write placeholder first
+                await write_placeholder_point(
+                    doc_id=doc_id,
+                    doc_type="note",
+                    user_id=user_id,
+                    modified_at=modified_at,
+                    etag=note.get("etag", ""),
                )
-                del _potentially_deleted[doc_key]
-
-            # Send if never indexed or modified since last index
-            if indexed_at is None or modified_at > indexed_at:
                await send_stream.send(
                    DocumentTask(
                        user_id=user_id,
@@ -252,6 +240,76 @@ async def scan_user_documents(
                    )
                )
                queued += 1
+            else:
+                # Incremental sync: check if document exists and compare modified_at
+                # If document reappeared, remove from potentially_deleted
+                doc_key = (user_id, doc_id)
+                if doc_key in _potentially_deleted:
+                    logger.debug(
+                        f"Document {doc_id} reappeared, removing from deletion grace period"
+                    )
+                    del _potentially_deleted[doc_key]
+
+                # Query Qdrant for existing entry (placeholder or real)
+                existing_metadata = await query_document_metadata(
+                    doc_id=doc_id, doc_type="note", user_id=user_id
+                )
+
+                # Send if never indexed or modified since last index
+                # Compare against stored modified_at (not indexed_at!)
+                needs_indexing = False
+                if existing_metadata is None:
+                    # Never seen before
+                    needs_indexing = True
+                elif existing_metadata.get("modified_at", 0) < modified_at:
+                    # Document modified since last indexing
+                    needs_indexing = True
+                elif existing_metadata.get("is_placeholder", False):
+                    # Placeholder exists - check if it's stale (processing may have failed)
+                    # Only requeue if placeholder is older than 5x scan interval
+                    # (Large PDFs can take 3-4 minutes to process)
+                    queued_at = existing_metadata.get("queued_at", 0)
+                    placeholder_age = time.time() - queued_at
+                    stale_threshold = get_settings().vector_sync_scan_interval * 5
+                    if placeholder_age > stale_threshold:
+                        logger.debug(
+                            f"Found stale placeholder for note {doc_id} "
+                            f"(age={placeholder_age:.1f}s), requeuing"
+                        )
+                        needs_indexing = True
+                    else:
+                        logger.debug(
+                            f"Skipping note {doc_id} with recent placeholder "
+                            f"(age={placeholder_age:.1f}s < {stale_threshold:.1f}s)"
+                        )
+
+                if needs_indexing:
+                    # Write placeholder before queuing
+                    await write_placeholder_point(
+                        doc_id=doc_id,
+                        doc_type="note",
+                        user_id=user_id,
+                        modified_at=modified_at,
+                        etag=note.get("etag", ""),
+                    )
+                    await send_stream.send(
+                        DocumentTask(
+                            user_id=user_id,
+                            doc_id=doc_id,
+                            doc_type="note",
+                            operation="index",
+                            modified_at=modified_at,
+                        )
+                    )
+                    queued += 1
+
+        # Log and record metrics after streaming
+        logger.info(f"[SCAN-{scan_id}] Found {note_count} notes for {user_id}")
+        record_vector_sync_scan(note_count)
+
+        if initial_sync:
+            logger.info(f"Sent {queued} documents for initial sync: {user_id}")
+            return

        # Check for deleted documents (in Qdrant but not in Nextcloud)
        # Use grace period: only delete after 2 consecutive scans confirm absence
@@ -261,7 +319,7 @@ async def scan_user_documents(
        )  # Allow 1.5 scan intervals
        current_time = time.time()

-        for doc_id in indexed_docs:
+        for doc_id in indexed_doc_ids:
            if doc_id not in nextcloud_doc_ids:
                doc_key = (user_id, doc_id)

@@ -300,7 +358,195 @@ async def scan_user_documents(
                    )
                    _potentially_deleted[doc_key] = current_time

+        # Scan tagged PDF files (after notes)
+        # Get indexed file IDs from Qdrant (for deletion tracking)
+        indexed_file_ids = set()
+        if not initial_sync:
+            file_scroll_result = await qdrant_client.scroll(
+                collection_name=settings.get_collection_name(),
+                scroll_filter=Filter(
+                    must=[
+                        FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                        FieldCondition(key="doc_type", match=MatchValue(value="file")),
+                    ]
+                ),
+                limit=10000,  # Reasonable limit for file count
+                with_payload=["doc_id"],
+                with_vectors=False,
+            )
+
+            indexed_file_ids = {
+                point.payload["doc_id"] for point in file_scroll_result[0]
+            }
+
+            logger.debug(f"Found {len(indexed_file_ids)} indexed files in Qdrant")
+
+        # Scan for tagged PDF files
+        file_count = 0
+        file_queued = 0
+        nextcloud_file_ids = set()
+
+        try:
+            # Find files with vector-index tag using OCS Tags API
+            settings = get_settings()
+            tag_name = os.getenv("VECTOR_SYNC_PDF_TAG", "vector-index")
+            # Use NextcloudClient.find_files_by_tag() which uses proper OCS API
+            # and filters by PDF MIME type
+            tagged_files = await nc_client.find_files_by_tag(
+                tag_name, mime_type_filter="application/pdf"
+            )
+
+            for file_info in tagged_files:
+                # Files are already filtered by MIME type in find_files_by_tag()
+                file_count += 1
+                file_id = file_info["id"]  # Use numeric file ID, not path
+                file_path = file_info["path"]  # Keep path for logging
+                nextcloud_file_ids.add(file_id)
+
+                # Use last_modified timestamp if available, otherwise use current time
+                modified_at = file_info.get("last_modified_timestamp", int(time.time()))
+                if isinstance(file_info.get("last_modified"), str):
+                    # Parse RFC 2822 date format if needed
+                    from email.utils import parsedate_to_datetime
+
+                    try:
+                        dt = parsedate_to_datetime(file_info["last_modified"])
+                        modified_at = int(dt.timestamp())
+                    except (ValueError, KeyError):
+                        pass
+
+                if initial_sync:
+                    # Send everything on first sync - write placeholder first
+                    await write_placeholder_point(
+                        doc_id=file_id,
+                        doc_type="file",
+                        user_id=user_id,
+                        modified_at=modified_at,
+                        file_path=file_path,
+                    )
+                    await send_stream.send(
+                        DocumentTask(
+                            user_id=user_id,
+                            doc_id=file_id,  # Use numeric file ID
+                            doc_type="file",
+                            operation="index",
+                            modified_at=modified_at,
+                            file_path=file_path,  # Pass file path for content retrieval
+                        )
+                    )
+                    file_queued += 1
+                else:
+                    # Incremental sync: check if file exists and compare modified_at
+                    # If file reappeared, remove from potentially_deleted
+                    file_key = (user_id, file_id)
+                    if file_key in _potentially_deleted:
+                        logger.debug(
+                            f"File {file_path} (ID: {file_id}) reappeared, removing from deletion grace period"
+                        )
+                        del _potentially_deleted[file_key]
+
+                    # Query Qdrant for existing entry (placeholder or real)
+                    existing_metadata = await query_document_metadata(
+                        doc_id=file_id, doc_type="file", user_id=user_id
+                    )
+
+                    # Send if never indexed or modified since last index
+                    # Compare against stored modified_at (not indexed_at!)
+                    needs_indexing = False
+                    if existing_metadata is None:
+                        # Never seen before
+                        needs_indexing = True
+                    elif existing_metadata.get("modified_at", 0) < modified_at:
+                        # File modified since last indexing
+                        needs_indexing = True
+                    elif existing_metadata.get("is_placeholder", False):
+                        # Placeholder exists - check if it's stale (processing may have failed)
+                        # Only requeue if placeholder is older than 5x scan interval
+                        # (Large PDFs can take 3-4 minutes to process)
+                        queued_at = existing_metadata.get("queued_at", 0)
+                        placeholder_age = time.time() - queued_at
+                        stale_threshold = get_settings().vector_sync_scan_interval * 5
+                        if placeholder_age > stale_threshold:
+                            logger.debug(
+                                f"Found stale placeholder for file {file_path} (ID: {file_id}) "
+                                f"(age={placeholder_age:.1f}s), requeuing"
+                            )
+                            needs_indexing = True
+                        else:
+                            logger.debug(
+                                f"Skipping file {file_path} (ID: {file_id}) with recent placeholder "
+                                f"(age={placeholder_age:.1f}s < {stale_threshold:.1f}s)"
+                            )
+
+                    if needs_indexing:
+                        # Write placeholder before queuing
+                        await write_placeholder_point(
+                            doc_id=file_id,
+                            doc_type="file",
+                            user_id=user_id,
+                            modified_at=modified_at,
+                            file_path=file_path,
+                        )
+                        await send_stream.send(
+                            DocumentTask(
+                                user_id=user_id,
+                                doc_id=file_id,  # Use numeric file ID
+                                doc_type="file",
+                                operation="index",
+                                modified_at=modified_at,
+                                file_path=file_path,  # Pass file path for content retrieval
+                            )
+                        )
+                        file_queued += 1
+
+            logger.info(
+                f"[SCAN-{scan_id}] Found {file_count} tagged PDFs for {user_id}"
+            )
+            record_vector_sync_scan(file_count)
+
+            # Check for deleted files (not initial sync)
+            if not initial_sync:
+                for file_id in indexed_file_ids:
+                    if file_id not in nextcloud_file_ids:
+                        file_key = (user_id, file_id)
+
+                        if file_key in _potentially_deleted:
+                            # Check if grace period elapsed
+                            first_missing_time = _potentially_deleted[file_key]
+                            time_missing = current_time - first_missing_time
+
+                            if time_missing >= grace_period:
+                                # Grace period elapsed, send for deletion
+                                logger.info(
+                                    f"File ID {file_id} missing for {time_missing:.1f}s "
+                                    f"(>{grace_period:.1f}s grace period), sending deletion"
+                                )
+                                await send_stream.send(
+                                    DocumentTask(
+                                        user_id=user_id,
+                                        doc_id=file_id,  # Use numeric file ID
+                                        doc_type="file",
+                                        operation="delete",
+                                        modified_at=0,
+                                    )
+                                )
+                                file_queued += 1
+                                del _potentially_deleted[file_key]
+                        else:
+                            # First time missing, add to grace period tracking
+                            logger.debug(
+                                f"File ID {file_id} missing for first time, starting grace period"
+                            )
+                            _potentially_deleted[file_key] = current_time
+
+        except Exception as e:
+            logger.warning(f"Failed to scan tagged files for {user_id}: {e}")
+
+        queued += file_queued
+
        if queued > 0:
-            logger.info(f"Sent {queued} documents for incremental sync: {user_id}")
+            logger.info(
+                f"Sent {queued} documents ({file_queued} files) for incremental sync: {user_id}"
+            )
        else:
            logger.debug(f"No changes detected for {user_id}")
@@ -1,6 +1,6 @@
 [project]
 name = "nextcloud-mcp-server"
-version = "0.33.1"
+version = "0.48.2"
 description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
 authors = [
    {name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -10,9 +10,9 @@ license = {text = "AGPL-3.0-only"}
 requires-python = ">=3.11"
 keywords = ["nextcloud", "mcp", "model-context-protocol", "llm", "ai", "claude", "webdav", "caldav", "carddav"]
 dependencies = [
-    "mcp[cli] (>=1.21,<1.22)",
+    "mcp[cli] (>=1.22,<1.23)",
    "httpx (>=0.28.1,<0.29.0)",
-    "pillow (>=12.0.0,<12.1.0)",
+    "pillow (>=10.3.0,<12.0.0)", # Compatible with fastembed
    "icalendar (>=6.0.0,<7.0.0)",
    "pythonvcard4>=0.2.0",
    "pydantic>=2.11.4",
@@ -22,6 +22,9 @@ dependencies = [
    "aiosqlite>=0.20.0", # Async SQLite for refresh token storage
    "authlib>=1.6.5",
    "qdrant-client>=1.7.0",
+    "fastembed>=0.7.3", # BM25 sparse vector embeddings for hybrid search
+    "anthropic>=0.42.0", # For RAG evaluation with Anthropic LLMs
+    "boto3>=1.35.0", # For Amazon Bedrock provider (optional)
    # Observability dependencies
    "prometheus-client>=0.21.0", # Prometheus metrics
    "opentelemetry-api>=1.28.2", # OpenTelemetry API
@@ -31,6 +34,12 @@ dependencies = [
    "opentelemetry-instrumentation-logging>=0.49b2", # Logging integration
    "opentelemetry-exporter-otlp-proto-grpc>=1.28.2", # OTLP gRPC exporter
    "python-json-logger>=3.2.0", # Structured JSON logging
+    "jinja2>=3.1.6",
+    "langchain-text-splitters>=1.0.0",
+    "pymupdf>=1.26.6",
+    "pymupdf4llm>=0.2.2",
+    "pymupdf-layout>=1.26.6",
+    "openai>=2.8.1",
 ]
 classifiers = [
    "Development Status :: 4 - Beta",
@@ -103,6 +112,7 @@ module-root = ""
 [dependency-groups]
 dev = [
    "commitizen>=4.8.2",
+    "datasets>=3.3.0", # For BeIR nfcorpus dataset loading
    "ipython>=9.2.0",
    "playwright>=1.49.1",
    "pytest>=8.3.5",
@@ -117,6 +127,7 @@ dev = [

 [project.scripts]
 nextcloud-mcp-server = "nextcloud_mcp_server.cli:run"
+smithery-main = "nextcloud_mcp_server.smithery_main:main"

 [[tool.uv.index]]
 name = "testpypi"
@@ -0,0 +1,38 @@
+# Smithery configuration for Nextcloud MCP Server
+# See: https://smithery.ai/docs/build/configuration
+# ADR-016: Stateless deployment mode for multi-user public Nextcloud instances
+
+runtime: "container"
+
+build:
+  dockerfile: "Dockerfile.smithery"
+  dockerBuildPath: "."
+
+startCommand:
+  type: "http"
+  configSchema:
+    type: "object"
+    required:
+      - "nextcloud_url"
+      - "username"
+      - "app_password"
+    properties:
+      nextcloud_url:
+        type: "string"
+        title: "Nextcloud URL"
+        description: "Your Nextcloud instance URL (e.g., https://cloud.example.com). Must be publicly accessible."
+        pattern: "^https?://.+"
+      username:
+        type: "string"
+        title: "Username"
+        description: "Your Nextcloud username"
+        minLength: 1
+      app_password:
+        type: "string"
+        title: "App Password"
+        description: "Nextcloud app password. Generate at Settings > Security > App passwords. Do NOT use your main password."
+        minLength: 1
+  exampleConfig:
+    nextcloud_url: "https://cloud.example.com"
+    username: "alice"
+    app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
@@ -113,6 +113,7 @@ async def create_mcp_client_session(
    token: str | None = None,
    client_name: str = "MCP",
    elicitation_callback: Any = None,
+    sampling_callback: Any = None,
 ) -> AsyncGenerator[ClientSession, Any]:
    """
    Factory function to create an MCP client session with proper lifecycle management.
@@ -132,6 +133,8 @@ async def create_mcp_client_session(
        client_name: Client name for logging (e.g., "OAuth MCP (Playwright)")
        elicitation_callback: Optional callback for handling elicitation requests.
            Should match signature: async def callback(context: RequestContext, params: ElicitRequestParams) -> ElicitResult | ErrorData
+        sampling_callback: Optional callback for handling sampling (LLM generation) requests.
+            Should match signature: async def callback(context: RequestContext, params: CreateMessageRequestParams) -> CreateMessageResult | ErrorData

    Yields:
        Initialized MCP ClientSession
@@ -155,7 +158,10 @@ async def create_mcp_client_session(
        _,
    ):
        async with ClientSession(
-            read_stream, write_stream, elicitation_callback=elicitation_callback
+            read_stream,
+            write_stream,
+            elicitation_callback=elicitation_callback,
+            sampling_callback=sampling_callback,
        ) as session:
            await session.initialize()
            logger.info(f"{client_name} client session initialized successfully")
@@ -208,7 +214,8 @@ async def nc_mcp_client(anyio_backend) -> AsyncGenerator[ClientSession, Any]:
    Uses anyio pytest plugin for proper async fixture handling.
    """
    async for session in create_mcp_client_session(
-        url="http://localhost:8000/mcp", client_name="Basic MCP"
+        url="http://localhost:8000/mcp",
+        client_name="Basic MCP (HTTP)",
    ):
        yield session

@@ -0,0 +1,37 @@
+[
+  {
+    "id": "nc-manual-001",
+    "query": "What is two-factor authentication and how does it protect my Nextcloud account?",
+    "ground_truth": "Two-factor authentication (2FA) protects your Nextcloud account by requiring two different proofs of identity - something you know (like a password) and something you have (like a code from your phone). The first factor is typically a password, and the second can be a text message or code generated on your phone.",
+    "expected_topics": ["two-factor authentication", "2FA", "password", "security"],
+    "difficulty": "easy"
+  },
+  {
+    "id": "nc-manual-002",
+    "query": "How do file quotas work in Nextcloud when sharing files?",
+    "ground_truth": "When you share files with other users, the shared files count against the original share owner's quota. When you share a folder and allow others to upload files, all uploaded and edited files count against your quota. Re-shared files still count against the original share owner's quota. Deleted files in trash don't count against quotas until trash exceeds 50% of quota.",
+    "expected_topics": ["quota", "sharing", "files", "storage"],
+    "difficulty": "medium"
+  },
+  {
+    "id": "nc-manual-003",
+    "query": "How do I install the Nextcloud desktop sync client on Linux?",
+    "ground_truth": "Linux users must follow instructions on the download page to add the appropriate repository for their Linux distribution, install the signing key, and use their package managers to install the desktop sync client. Linux users also need a password manager enabled, such as GNOME Keyring or KWallet, so the sync client can login automatically.",
+    "expected_topics": ["Linux", "desktop client", "installation", "package manager", "GNOME Keyring", "KWallet"],
+    "difficulty": "medium"
+  },
+  {
+    "id": "nc-manual-004",
+    "query": "What are the system requirements for the Nextcloud desktop client on Windows?",
+    "ground_truth": "The Nextcloud desktop sync client requires Windows 10 or later, 64-bits only.",
+    "expected_topics": ["Windows", "system requirements", "desktop client"],
+    "difficulty": "easy"
+  },
+  {
+    "id": "nc-manual-005",
+    "query": "How do I use client applications with two-factor authentication enabled?",
+    "ground_truth": "Once you have enabled 2FA, your clients will no longer be able to connect with just your password unless they also support two-factor authentication. To solve this, you should generate device-specific passwords for them. This is managed through the connected browsers and devices settings.",
+    "expected_topics": ["2FA", "client applications", "device-specific passwords", "app passwords"],
+    "difficulty": "medium"
+  }
+]
@@ -0,0 +1,94 @@
+"""MCP sampling support for integration tests.
+
+This module provides utilities to enable real LLM-based sampling in integration tests
+using OpenAI or GitHub Models API.
+"""
+
+import logging
+from typing import Any
+
+from mcp import types
+from mcp.client.session import ClientSession, RequestContext
+
+from nextcloud_mcp_server.providers.openai import OpenAIProvider
+
+logger = logging.getLogger(__name__)
+
+
+def create_openai_sampling_callback(provider: OpenAIProvider):
+    """Factory to create a sampling callback using OpenAI provider.
+
+    The callback conforms to MCP's SamplingFnT protocol and can be passed
+    to ClientSession for handling sampling requests from the server.
+
+    Args:
+        provider: OpenAIProvider instance configured with a generation model
+
+    Returns:
+        Async callback function for MCP sampling
+
+    Example:
+        ```python
+        provider = OpenAIProvider(
+            api_key=os.getenv("OPENAI_API_KEY"),
+            base_url=os.getenv("OPENAI_BASE_URL"),
+            generation_model="gpt-4o-mini",
+        )
+        callback = create_openai_sampling_callback(provider)
+
+        async for session in create_mcp_client_session(
+            url="http://localhost:8000/mcp",
+            sampling_callback=callback,
+        ):
+            # Session now supports sampling
+            pass
+        ```
+    """
+
+    async def sampling_callback(
+        context: RequestContext[ClientSession, Any],
+        params: types.CreateMessageRequestParams,
+    ) -> types.CreateMessageResult | types.ErrorData:
+        """Handle sampling requests using OpenAI provider."""
+        logger.debug(f"Sampling callback invoked with {len(params.messages)} messages")
+
+        # Extract messages and build prompt
+        messages_text = []
+        for msg in params.messages:
+            if hasattr(msg.content, "text"):
+                role_prefix = "User" if msg.role == "user" else "Assistant"
+                messages_text.append(f"{role_prefix}: {msg.content.text}")
+
+        prompt = "\n\n".join(messages_text)
+
+        # Add system prompt if provided
+        if params.systemPrompt:
+            prompt = f"System: {params.systemPrompt}\n\n{prompt}"
+
+        logger.debug(f"Generating response for prompt ({len(prompt)} chars)")
+
+        try:
+            # Generate response using OpenAI provider
+            # Note: temperature is hardcoded in the provider at 0.7
+            response = await provider.generate(
+                prompt=prompt,
+                max_tokens=params.maxTokens,
+            )
+
+            model_name = provider.generation_model or "unknown"
+            logger.info(f"Sampling completed: {len(response)} chars from {model_name}")
+
+            return types.CreateMessageResult(
+                role="assistant",
+                content=types.TextContent(type="text", text=response),
+                model=model_name,
+                stopReason="endTurn",
+            )
+        except Exception as e:
+            logger.error(f"OpenAI generation failed: {e}")
+            return types.ErrorData(
+                code=types.INTERNAL_ERROR,
+                message=f"OpenAI generation failed: {e!s}",
+            )
+
+    return sampling_callback
@@ -0,0 +1,361 @@
+"""Integration tests for PDF document indexing and semantic search.
+
+These tests validate the complete PDF processing flow:
+1. Process PDF with PyMuPDFProcessor
+2. Chunk extracted text with page numbers
+3. Index chunks into Qdrant with metadata
+4. Perform semantic search on PDF content
+5. Verify page numbers and metadata are preserved
+"""
+
+import pymupdf
+import pytest
+from qdrant_client import AsyncQdrantClient
+from qdrant_client.models import Distance, PointStruct, VectorParams
+
+from nextcloud_mcp_server.document_processors.pymupdf import PyMuPDFProcessor
+from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+from nextcloud_mcp_server.vector.document_chunker import (
+    ChunkWithPosition,
+    RecursiveCharacterTextSplitter,
+)
+
+pytestmark = pytest.mark.integration
+
+
+def create_test_pdf() -> bytes:
+    """Create a small test PDF with multiple pages."""
+    doc = pymupdf.open()
+
+    # Page 1: Introduction
+    page1 = doc.new_page(width=595, height=842)  # A4 size
+    page1.insert_text(
+        (50, 50),
+        "Nextcloud Administration Guide\n\n"
+        "Chapter 1: Introduction\n\n"
+        "Nextcloud is a self-hosted file sharing and collaboration platform. "
+        "It provides secure file storage, sharing, and synchronization across devices. "
+        "This guide covers installation, configuration, and maintenance of Nextcloud.",
+    )
+
+    # Page 2: Installation
+    page2 = doc.new_page(width=595, height=842)
+    page2.insert_text(
+        (50, 50),
+        "Chapter 2: Installation\n\n"
+        "System Requirements:\n"
+        "- PHP 8.0 or higher\n"
+        "- MySQL 8.0 or MariaDB 10.5\n"
+        "- Apache or Nginx web server\n\n"
+        "Installation steps:\n"
+        "1. Download Nextcloud package\n"
+        "2. Extract to web server directory\n"
+        "3. Configure database connection\n"
+        "4. Run installation wizard",
+    )
+
+    # Page 3: Configuration
+    page3 = doc.new_page(width=595, height=842)
+    page3.insert_text(
+        (50, 50),
+        "Chapter 3: Configuration\n\n"
+        "Database Configuration:\n"
+        "Edit config/config.php to set database parameters. "
+        "Configure database host, username, password, and database name. "
+        "For optimal performance, use MySQL or MariaDB.\n\n"
+        "Security Settings:\n"
+        "Enable HTTPS, configure trusted domains, and set up firewall rules.",
+    )
+
+    # Convert to bytes
+    pdf_bytes = doc.tobytes()
+    doc.close()
+
+    return pdf_bytes
+
+
+@pytest.fixture
+async def simple_embedding_provider():
+    """Simple in-process embedding provider for testing."""
+    return SimpleEmbeddingProvider(dimension=384)
+
+
+@pytest.fixture
+async def qdrant_test_client():
+    """Qdrant client for testing (in-memory)."""
+    client = AsyncQdrantClient(":memory:")
+    yield client
+    await client.close()
+
+
+@pytest.fixture
+async def test_collection(qdrant_test_client: AsyncQdrantClient):
+    """Create test collection in Qdrant."""
+    collection_name = "test_pdf_indexing"
+
+    # Create collection
+    await qdrant_test_client.create_collection(
+        collection_name=collection_name,
+        vectors_config=VectorParams(size=384, distance=Distance.COSINE),
+    )
+
+    yield collection_name
+
+    # Cleanup
+    try:
+        await qdrant_test_client.delete_collection(collection_name)
+    except Exception:
+        pass
+
+
+@pytest.fixture
+def pymupdf_processor():
+    """PyMuPDF processor for testing (without image extraction)."""
+    return PyMuPDFProcessor(extract_images=False)
+
+
+async def test_pymupdf_processor_extracts_text_and_metadata(pymupdf_processor):
+    """Test PyMuPDF processor extracts text and metadata from PDF."""
+    pdf_bytes = create_test_pdf()
+
+    result = await pymupdf_processor.process(
+        content=pdf_bytes,
+        content_type="application/pdf",
+        filename="test-admin-guide.pdf",
+    )
+
+    # Verify result structure
+    assert result.success is True
+    assert result.processor == "pymupdf"
+    assert result.text is not None
+    assert len(result.text) > 0
+
+    # Verify extracted text contains expected content
+    assert "Nextcloud Administration Guide" in result.text
+    assert "Chapter 1: Introduction" in result.text
+    assert "Chapter 2: Installation" in result.text
+    assert "Chapter 3: Configuration" in result.text
+    assert "PHP 8.0 or higher" in result.text
+    assert "MySQL" in result.text
+
+    # Verify metadata
+    assert result.metadata is not None
+    assert result.metadata["page_count"] == 3
+    assert result.metadata["filename"] == "test-admin-guide.pdf"
+    assert "format" in result.metadata
+
+
+async def test_document_chunker_preserves_page_numbers():
+    """Test that document chunker can handle chunks with page number metadata."""
+    # Create chunks with page numbers
+    chunks = [
+        ChunkWithPosition(
+            text="Chapter 1 content on page 1",
+            start_offset=0,
+            end_offset=28,
+            page_number=1,
+        ),
+        ChunkWithPosition(
+            text="Chapter 2 content on page 2",
+            start_offset=29,
+            end_offset=57,
+            page_number=2,
+        ),
+        ChunkWithPosition(
+            text="Chapter 3 content on page 3",
+            start_offset=58,
+            end_offset=86,
+            page_number=3,
+        ),
+    ]
+
+    # Verify page numbers are preserved
+    assert chunks[0].page_number == 1
+    assert chunks[1].page_number == 2
+    assert chunks[2].page_number == 3
+
+
+async def test_pdf_indexing_and_search_flow(
+    pymupdf_processor: PyMuPDFProcessor,
+    qdrant_test_client: AsyncQdrantClient,
+    test_collection: str,
+    simple_embedding_provider: SimpleEmbeddingProvider,
+):
+    """Test complete PDF indexing and semantic search flow."""
+
+    # Step 1: Process PDF with PyMuPDF
+    pdf_bytes = create_test_pdf()
+    result = await pymupdf_processor.process(
+        content=pdf_bytes,
+        content_type="application/pdf",
+        filename="/Documents/admin-guide.pdf",
+    )
+
+    assert result.success is True
+    assert result.metadata["page_count"] == 3
+
+    # Step 2: Chunk the extracted text
+    # Note: In real implementation, we'd track which chunk came from which page
+    # For this test, we'll simulate by creating chunks manually
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = splitter.split_text(result.text)
+
+    assert len(chunks) > 0
+
+    # Step 3: Index chunks into Qdrant with PDF metadata
+    points = []
+    for idx, chunk_text in enumerate(chunks):
+        embedding = await simple_embedding_provider.embed(chunk_text)
+
+        # Simulate page number assignment (in real implementation, this would be tracked)
+        # For simplicity, assign page based on content
+        page_number = 1
+        if "Chapter 2" in chunk_text or "Installation" in chunk_text:
+            page_number = 2
+        elif "Chapter 3" in chunk_text or "Configuration" in chunk_text:
+            page_number = 3
+
+        points.append(
+            PointStruct(
+                id=idx,
+                vector=embedding,
+                payload={
+                    "user_id": "admin",
+                    "doc_id": "/Documents/admin-guide.pdf",
+                    "doc_type": "file",
+                    "title": "Nextcloud Administration Guide",
+                    "file_path": "/Documents/admin-guide.pdf",
+                    "mime_type": "application/pdf",
+                    "page_number": page_number,
+                    "page_count": result.metadata["page_count"],
+                    "chunk_index": idx,
+                    "excerpt": chunk_text[:200],
+                },
+            )
+        )
+
+    await qdrant_test_client.upsert(
+        collection_name=test_collection, points=points, wait=True
+    )
+
+    # Step 4: Perform semantic search for installation instructions
+    query = "how to install Nextcloud system requirements"
+    query_embedding = await simple_embedding_provider.embed(query)
+
+    response = await qdrant_test_client.query_points(
+        collection_name=test_collection,
+        query=query_embedding,
+        limit=3,
+        score_threshold=0.0,
+    )
+
+    # Verify search results
+    assert len(response.points) > 0
+
+    # Top result should be from installation chapter (page 2)
+    top_result = response.points[0]
+    assert top_result.payload["doc_type"] == "file"
+    assert top_result.payload["file_path"] == "/Documents/admin-guide.pdf"
+    assert (
+        "Installation" in top_result.payload["excerpt"]
+        or top_result.payload["page_number"] == 2
+    )
+
+    # Verify page number is preserved
+    assert top_result.payload["page_number"] in [1, 2, 3]
+    assert top_result.payload["page_count"] == 3
+
+    # Step 5: Search for configuration
+    query = "database configuration settings MySQL"
+    query_embedding = await simple_embedding_provider.embed(query)
+
+    response = await qdrant_test_client.query_points(
+        collection_name=test_collection,
+        query=query_embedding,
+        limit=3,
+        score_threshold=0.0,
+    )
+
+    assert len(response.points) > 0
+
+    # Should find configuration chapter (page 3)
+    found_config = any(
+        "Configuration" in r.payload["excerpt"] or r.payload["page_number"] == 3
+        for r in response.points[:2]
+    )
+    assert found_config
+
+
+async def test_pdf_search_with_filters(
+    pymupdf_processor: PyMuPDFProcessor,
+    qdrant_test_client: AsyncQdrantClient,
+    test_collection: str,
+    simple_embedding_provider: SimpleEmbeddingProvider,
+):
+    """Test PDF search with metadata filters."""
+    from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+    # Process and index PDF
+    pdf_bytes = create_test_pdf()
+    result = await pymupdf_processor.process(
+        content=pdf_bytes,
+        content_type="application/pdf",
+        filename="/Documents/admin-guide.pdf",
+    )
+
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = splitter.split_text(result.text)
+
+    # Index with metadata
+    points = []
+    for idx, chunk_text in enumerate(chunks):
+        embedding = await simple_embedding_provider.embed(chunk_text)
+
+        points.append(
+            PointStruct(
+                id=idx,
+                vector=embedding,
+                payload={
+                    "user_id": "admin",
+                    "doc_id": "/Documents/admin-guide.pdf",
+                    "doc_type": "file",
+                    "mime_type": "application/pdf",
+                    "excerpt": chunk_text[:200],
+                },
+            )
+        )
+
+    await qdrant_test_client.upsert(
+        collection_name=test_collection, points=points, wait=True
+    )
+
+    # Search with filter for PDFs only
+    query = "Nextcloud installation"
+    query_embedding = await simple_embedding_provider.embed(query)
+
+    response = await qdrant_test_client.query_points(
+        collection_name=test_collection,
+        query=query_embedding,
+        query_filter=Filter(
+            must=[FieldCondition(key="doc_type", match=MatchValue(value="file"))]
+        ),
+        limit=3,
+    )
+
+    # All results should be from file documents
+    assert len(response.points) > 0
+    for result in response.points:
+        assert result.payload["doc_type"] == "file"
+        assert result.payload["mime_type"] == "application/pdf"
+
+
+async def test_pymupdf_health_check(pymupdf_processor: PyMuPDFProcessor):
+    """Test PyMuPDF processor health check."""
+    is_healthy = await pymupdf_processor.health_check()
+    assert is_healthy is True
+
+
+async def test_pymupdf_supports_pdf_mime_type(pymupdf_processor: PyMuPDFProcessor):
+    """Test PyMuPDF processor declares PDF support."""
+    assert "application/pdf" in pymupdf_processor.supported_mime_types
+    assert pymupdf_processor.name == "pymupdf"
@@ -0,0 +1,423 @@
+"""Integration tests for RAG pipeline with OpenAI/GitHub Models API.
+
+These tests validate the complete semantic search and MCP sampling flow using:
+1. OpenAI embeddings for semantic search
+2. MCP sampling for answer generation
+3. Pre-indexed Nextcloud User Manual as the knowledge base
+
+Environment Variables:
+    OPENAI_API_KEY: OpenAI API key or GitHub token for models.github.ai
+    OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
+    OPENAI_EMBEDDING_MODEL: Embedding model (default: "text-embedding-3-small")
+    OPENAI_GENERATION_MODEL: Generation model for sampling (default: "gpt-4o-mini")
+    RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: "Nextcloud_User_Manual.pdf")
+
+For GitHub CI, set:
+    OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+    OPENAI_BASE_URL: https://models.github.ai/inference
+    OPENAI_EMBEDDING_MODEL: openai/text-embedding-3-small
+    OPENAI_GENERATION_MODEL: openai/gpt-4o-mini
+
+Prerequisites:
+    - Nextcloud User Manual PDF uploaded to Nextcloud
+    - VECTOR_SYNC_ENABLED=true on the MCP server
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Any, AsyncGenerator
+
+import anyio
+import pytest
+from mcp import ClientSession
+
+from nextcloud_mcp_server.providers.openai import OpenAIProvider
+from tests.conftest import create_mcp_client_session
+from tests.integration.sampling_support import create_openai_sampling_callback
+
+logger = logging.getLogger(__name__)
+
+# Default path to the Nextcloud User Manual PDF
+DEFAULT_MANUAL_PATH = "Nextcloud Manual.pdf"
+
+
+async def llm_judge(
+    provider: "OpenAIProvider",
+    ground_truth: str,
+    system_output: str,
+) -> bool:
+    """Use LLM to judge if system output aligns with ground truth.
+
+    Args:
+        provider: OpenAI provider with generation capability
+        ground_truth: The expected/reference answer
+        system_output: The system's actual output to evaluate
+
+    Returns:
+        True if output aligns with ground truth, False otherwise
+    """
+    prompt = f"""GROUND TRUTH: {ground_truth}
+
+SYSTEM OUTPUT: {system_output}
+
+Does the system output contain the key facts from the ground truth?
+
+Answer: TRUE or FALSE"""
+
+    response = await provider.generate(prompt, max_tokens=10)
+    return "TRUE" in response.upper()
+
+
+# Skip all tests if OpenAI API key not configured
+pytestmark = [
+    pytest.mark.integration,
+    pytest.mark.skipif(
+        not os.getenv("OPENAI_API_KEY"),
+        reason="OPENAI_API_KEY not set - skipping OpenAI RAG tests",
+    ),
+]
+
+# Ground truth fixture path
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+GROUND_TRUTH_FILE = FIXTURES_DIR / "nextcloud_manual_ground_truth.json"
+
+
+@pytest.fixture(scope="module")
+def ground_truth_qa():
+    """Load ground truth Q&A pairs for the Nextcloud manual."""
+    if not GROUND_TRUTH_FILE.exists():
+        pytest.skip(f"Ground truth file not found: {GROUND_TRUTH_FILE}")
+
+    with open(GROUND_TRUTH_FILE) as f:
+        return json.load(f)
+
+
+@pytest.fixture(scope="module")
+async def indexed_manual_pdf(nc_client, nc_mcp_client):
+    """Ensure the Nextcloud User Manual PDF is tagged and indexed for vector search.
+
+    This fixture:
+    1. Gets file info for the manual PDF
+    2. Creates/gets the 'vector-index' tag
+    3. Assigns the tag to the file
+    4. Waits for vector sync to complete indexing
+
+    Environment Variables:
+        RAG_MANUAL_PATH: Path to manual PDF in Nextcloud (default: Nextcloud Manual.pdf)
+    """
+    manual_path = os.getenv("RAG_MANUAL_PATH", DEFAULT_MANUAL_PATH)
+
+    logger.info(f"Setting up indexed manual PDF: {manual_path}")
+
+    # Get file info to verify file exists and get file ID
+    file_info = await nc_client.webdav.get_file_info(manual_path)
+    if not file_info:
+        pytest.skip(f"Manual PDF not found at '{manual_path}'")
+
+    file_id = file_info["id"]
+    logger.info(f"Found manual PDF: {manual_path} (file_id={file_id})")
+
+    # Create or get the vector-index tag
+    tag = await nc_client.webdav.get_or_create_tag("vector-index")
+    tag_id = tag["id"]
+    logger.info(f"Using tag 'vector-index' (tag_id={tag_id})")
+
+    # Assign tag to file
+    await nc_client.webdav.assign_tag_to_file(file_id, tag_id)
+    logger.info(f"Tagged file {file_id} with vector-index tag")
+
+    # Wait for vector sync to complete indexing
+    max_attempts = 60
+    poll_interval = 10
+
+    logger.info("Waiting for vector sync to index the manual...")
+
+    for attempt in range(1, max_attempts + 1):
+        try:
+            # Call the MCP tool via the existing client session
+            result = await nc_mcp_client.call_tool(
+                "nc_get_vector_sync_status",
+                arguments={},
+            )
+
+            if not result.isError:
+                content = result.structuredContent or {}
+                indexed = content.get("indexed_count", 0)
+                pending = content.get("pending_count", 1)
+
+                logger.info(
+                    f"Attempt {attempt}/{max_attempts}: "
+                    f"indexed={indexed}, pending={pending}"
+                )
+
+                if indexed > 0 and pending == 0:
+                    logger.info(
+                        f"Vector indexing complete: {indexed} documents indexed"
+                    )
+                    break
+        except Exception as e:
+            logger.warning(f"Attempt {attempt}: Error checking status: {e}")
+
+        if attempt < max_attempts:
+            await anyio.sleep(poll_interval)
+    else:
+        logger.warning(
+            f"Vector indexing may not be complete after {max_attempts} attempts"
+        )
+
+    yield {
+        "path": manual_path,
+        "file_id": file_id,
+        "tag_id": tag_id,
+    }
+
+
+@pytest.fixture(scope="module")
+async def openai_provider():
+    """OpenAI provider configured from environment (embeddings only)."""
+    api_key = os.getenv("OPENAI_API_KEY")
+    base_url = os.getenv("OPENAI_BASE_URL")
+    embedding_model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small")
+
+    provider = OpenAIProvider(
+        api_key=api_key,
+        base_url=base_url,
+        embedding_model=embedding_model,
+        generation_model=None,  # Embeddings only
+    )
+
+    yield provider
+    await provider.close()
+
+
+@pytest.fixture(scope="module")
+async def openai_generation_provider():
+    """OpenAI provider configured for text generation (for sampling callback)."""
+    api_key = os.getenv("OPENAI_API_KEY")
+    base_url = os.getenv("OPENAI_BASE_URL")
+    generation_model = os.getenv("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
+
+    # For GitHub Models API, use the prefixed model name
+    if base_url and "models.github.ai" in base_url:
+        if not generation_model.startswith("openai/"):
+            generation_model = f"openai/{generation_model}"
+
+    provider = OpenAIProvider(
+        api_key=api_key,
+        base_url=base_url,
+        embedding_model=None,  # Generation only
+        generation_model=generation_model,
+    )
+
+    yield provider
+    await provider.close()
+
+
+@pytest.fixture(scope="module")
+async def nc_mcp_client_with_sampling(
+    anyio_backend, openai_generation_provider
+) -> AsyncGenerator[ClientSession, Any]:
+    """MCP client with OpenAI-based sampling support.
+
+    This fixture creates an MCP client that can handle sampling requests
+    from the server using OpenAI for text generation.
+    """
+    sampling_callback = create_openai_sampling_callback(openai_generation_provider)
+
+    async for session in create_mcp_client_session(
+        url="http://localhost:8000/mcp",
+        client_name="OpenAI Sampling MCP",
+        sampling_callback=sampling_callback,
+    ):
+        yield session
+
+
+async def test_openai_embeddings_work(openai_provider: OpenAIProvider):
+    """Test that OpenAI embeddings can be generated."""
+    embedding = await openai_provider.embed("test query about Nextcloud")
+
+    assert isinstance(embedding, list)
+    assert len(embedding) > 0
+    assert all(isinstance(x, float) for x in embedding)
+    # OpenAI embedding dimensions: 1536 (small) or 3072 (large)
+    assert len(embedding) in [1536, 3072]
+
+
+async def test_semantic_search_retrieval(
+    nc_mcp_client, ground_truth_qa, indexed_manual_pdf, openai_generation_provider
+):
+    """Test that semantic search retrieves relevant documents from the manual.
+
+    This tests the retrieval component of RAG - ensuring that queries
+    return relevant chunks from the indexed Nextcloud User Manual.
+    """
+    # Use first query from ground truth
+    test_case = ground_truth_qa[0]  # 2FA question
+    query = test_case["query"]
+
+    # Perform semantic search via MCP tool
+    result = await nc_mcp_client.call_tool(
+        "nc_semantic_search",
+        arguments={
+            "query": query,
+            "limit": 5,
+            "score_threshold": 0.0,
+        },
+    )
+
+    assert result.isError is False, f"Tool call failed: {result}"
+    data = result.structuredContent
+
+    # Verify we got results
+    assert data["success"] is True
+    assert data["total_found"] > 0, f"No results for query: {query}"
+    assert len(data["results"]) > 0
+
+    # Use LLM judge to evaluate if excerpts are relevant to ground truth
+    all_excerpts = " ".join([r["excerpt"] for r in data["results"]])
+    is_relevant = await llm_judge(
+        openai_generation_provider,
+        test_case["ground_truth"],
+        all_excerpts,
+    )
+    assert is_relevant, f"LLM judge: excerpts not relevant to query: {query}"
+
+
+async def test_semantic_search_answer_with_sampling(
+    nc_mcp_client_with_sampling,
+    ground_truth_qa,
+    indexed_manual_pdf,
+    openai_generation_provider,
+):
+    """Test semantic search with MCP sampling for answer generation.
+
+    This tests the full RAG pipeline:
+    1. Semantic search retrieves relevant documents
+    2. MCP sampling generates an answer from the retrieved context
+    3. OpenAI generates the answer via the sampling callback
+
+    Uses nc_mcp_client_with_sampling which has OpenAI-based sampling enabled.
+    """
+    # Use the 2FA question - has clear expected answer
+    test_case = ground_truth_qa[0]
+    query = test_case["query"]
+
+    result = await nc_mcp_client_with_sampling.call_tool(
+        "nc_semantic_search_answer",
+        arguments={
+            "query": query,
+            "limit": 5,
+            "score_threshold": 0.0,
+            "max_answer_tokens": 300,
+        },
+    )
+
+    assert result.isError is False, f"Tool call failed: {result}"
+    data = result.structuredContent
+
+    # Verify response structure
+    assert data["success"] is True
+    assert "query" in data
+    assert "generated_answer" in data
+    assert "sources" in data
+    assert "search_method" in data
+
+    # Check for either successful sampling or graceful fallback
+    fallback_methods = {
+        "semantic_sampling_unsupported",
+        "semantic_sampling_user_declined",
+        "semantic_sampling_timeout",
+        "semantic_sampling_mcp_error",
+        "semantic_sampling_fallback",
+    }
+
+    if data["search_method"] in fallback_methods:
+        # Fallback mode - verify sources still returned
+        assert len(data["sources"]) > 0, "Expected sources even in fallback mode"
+        pytest.skip(
+            f"MCP sampling not available (method: {data['search_method']}), "
+            f"but retrieval succeeded with {len(data['sources'])} sources"
+        )
+    else:
+        # Successful sampling - verify answer quality
+        assert data["search_method"] == "semantic_sampling"
+        assert data["generated_answer"] is not None
+        assert len(data["generated_answer"]) > 50  # Non-trivial answer
+
+        # Use LLM judge to evaluate answer relevance
+        is_relevant = await llm_judge(
+            openai_generation_provider,
+            test_case["ground_truth"],
+            data["generated_answer"],
+        )
+        assert is_relevant, f"LLM judge: answer not relevant to query: {query}"
+
+
+@pytest.mark.parametrize(
+    "qa_index,min_expected_results",
+    [
+        (0, 1),  # 2FA question
+        (1, 1),  # File quotas question
+        (2, 1),  # Linux installation question
+        (3, 1),  # Windows requirements question
+        (4, 1),  # Client apps with 2FA question
+    ],
+)
+async def test_retrieval_quality_all_queries(
+    nc_mcp_client, ground_truth_qa, indexed_manual_pdf, qa_index, min_expected_results
+):
+    """Test retrieval quality for all ground truth queries.
+
+    Validates that each query returns at least the minimum expected
+    number of relevant results from the Nextcloud manual.
+    """
+    if qa_index >= len(ground_truth_qa):
+        pytest.skip(f"Ground truth index {qa_index} not available")
+
+    test_case = ground_truth_qa[qa_index]
+    query = test_case["query"]
+
+    result = await nc_mcp_client.call_tool(
+        "nc_semantic_search",
+        arguments={
+            "query": query,
+            "limit": 5,
+            "score_threshold": 0.0,
+        },
+    )
+
+    assert result.isError is False
+    data = result.structuredContent
+
+    assert data["total_found"] >= min_expected_results, (
+        f"Query '{query}' returned {data['total_found']} results, "
+        f"expected at least {min_expected_results}"
+    )
+
+
+async def test_no_results_for_unrelated_query(nc_mcp_client, indexed_manual_pdf):
+    """Test that completely unrelated queries return low/no scores.
+
+    The Nextcloud manual shouldn't have relevant content for
+    quantum physics queries.
+    """
+    result = await nc_mcp_client.call_tool(
+        "nc_semantic_search",
+        arguments={
+            "query": "quantum entanglement hadron collider particle physics",
+            "limit": 5,
+            "score_threshold": 0.5,  # Higher threshold to filter irrelevant
+        },
+    )
+
+    assert result.isError is False
+    data = result.structuredContent
+
+    # Should have few or no high-scoring results
+    # Low score threshold means we might get some results, but they should be low quality
+    if data["total_found"] > 0:
+        # If results exist, they should have low scores
+        max_score = max(r["score"] for r in data["results"])
+        assert max_score < 0.8, f"Unexpected high score {max_score} for unrelated query"
@@ -0,0 +1,278 @@
+# RAG Evaluation Tests
+
+This directory contains tests for evaluating the Retrieval-Augmented Generation (RAG) system in the Nextcloud MCP server, specifically the `nc_semantic_search_answer` tool.
+
+## Architecture
+
+The RAG system has two components that are tested independently:
+
+1. **Retrieval** - Vector sync/embedding pipeline (indexed Nextcloud documents → vector database)
+2. **Generation** - MCP client LLM synthesis (retrieved context → natural language answer)
+
+See [ADR-013](../../docs/ADR-013-rag-evaluation.md) for full architectural details.
+
+## Test Structure
+
+```
+tests/rag_evaluation/
+├── README.md                       # This file
+├── conftest.py                     # Pytest fixtures
+├── llm_providers.py                # LLM provider abstraction (Ollama/Anthropic)
+├── fixtures/
+│   └── ground_truth.json           # Pre-generated reference answers
+├── test_retrieval_quality.py       # Retrieval evaluation (Context Recall)
+└── test_generation_quality.py      # Generation evaluation (Answer Correctness)
+```
+
+## Metrics
+
+### Retrieval Evaluation
+- **Metric**: Context Recall
+- **Method**: Heuristic - Check if ground-truth document IDs appear in top-k results
+- **Target**: ≥80% recall
+
+### Generation Evaluation
+- **Metric**: Answer Correctness
+- **Method**: LLM-as-judge - Compare RAG answer vs ground truth (binary true/false)
+- **Evaluation**: External LLM evaluates semantic equivalence
+
+## Dataset
+
+**BeIR/nfcorpus** - Medical/biomedical corpus with ~3,600 documents
+
+**Test Queries** (5 selected):
+1. PLAIN-2630: "Alkylphenol Endocrine Disruptors and Allergies" (21 relevant docs)
+2. PLAIN-2660: "How Long to Detox From Fish Before Pregnancy?" (20 relevant docs)
+3. PLAIN-2510: "Coffee and Artery Function" (16 relevant docs)
+4. PLAIN-2430: "Preventing Brain Loss with B Vitamins?" (15 relevant docs)
+5. PLAIN-2690: "Chronic Headaches and Pork Tapeworms" (14 relevant docs)
+
+## Setup
+
+### 1. Install Dependencies
+
+```bash
+uv sync --group dev
+```
+
+This installs:
+- `anthropic>=0.42.0` - For Anthropic LLM evaluation
+- `click>=8.1.8` - For CLI interface
+- `datasets>=3.3.0` - For BeIR nfcorpus dataset loading
+
+### 2. Configure LLM Provider
+
+Set environment variables for your LLM provider:
+
+**Option A: Ollama (default, local/remote)**
+```bash
+export RAG_EVAL_PROVIDER=ollama
+export OLLAMA_HOST=https://ollama.example.com  # or RAG_EVAL_OLLAMA_BASE_URL
+export RAG_EVAL_OLLAMA_MODEL=llama3.2:1b
+```
+
+**Option B: Anthropic (cloud)**
+```bash
+export RAG_EVAL_PROVIDER=anthropic
+export RAG_EVAL_ANTHROPIC_API_KEY=sk-ant-...
+export RAG_EVAL_ANTHROPIC_MODEL=claude-3-5-sonnet-20241022
+```
+
+### 3. One-Time Setup: Generate Ground Truth
+
+Generate synthetic reference answers for the 5 test queries:
+
+```bash
+uv run python tools/rag_eval_cli.py generate
+```
+
+**What this does:**
+- Downloads nfcorpus dataset to `tests/rag_evaluation/fixtures/nfcorpus/` (cached locally)
+- For each of the 5 selected queries, extracts highly relevant documents
+- Uses configured LLM to synthesize a reference answer
+- Saves to `tests/rag_evaluation/fixtures/ground_truth.json`
+
+**Optional flags:**
+- `--provider ollama|anthropic` - Override LLM provider
+- `--model MODEL_NAME` - Override model name
+- `--force-download` - Re-download nfcorpus dataset
+
+### 4. One-Time Setup: Upload Corpus to Nextcloud
+
+Upload all 3,633 nfcorpus documents as Nextcloud notes:
+
+```bash
+uv run python tools/rag_eval_cli.py upload \
+    --nextcloud-url http://localhost:8000 \
+    --username admin \
+    --password admin
+```
+
+**What this does:**
+- Downloads nfcorpus dataset (if not already cached)
+- Uploads all documents as notes in Nextcloud
+- Saves document ID → note ID mapping to `tests/rag_evaluation/fixtures/note_mapping.json`
+
+**Optional flags:**
+- `--category CATEGORY` - Custom category for notes (default: `nfcorpus_rag_eval`)
+- `--force-download` - Re-download nfcorpus dataset
+- `--force` - Delete all existing notes in the target category before uploading (efficient corpus refresh)
+
+**Important:** This step requires:
+- A running Nextcloud instance with vector sync enabled
+- Notes app installed
+- Valid credentials
+
+**Duration:** ~10-15 minutes to upload 3,633 documents
+
+## Running Tests
+
+### Run All RAG Evaluation Tests
+
+```bash
+uv run pytest tests/rag_evaluation/ -v
+```
+
+### Run Specific Test Suites
+
+**Retrieval Quality Only:**
+```bash
+uv run pytest tests/rag_evaluation/test_retrieval_quality.py -v
+```
+
+**Generation Quality Only:**
+```bash
+uv run pytest tests/rag_evaluation/test_generation_quality.py -v
+```
+
+### Run Individual Tests
+
+```bash
+uv run pytest tests/rag_evaluation/test_retrieval_quality.py::test_retrieval_context_recall -v
+uv run pytest tests/rag_evaluation/test_generation_quality.py::test_answer_correctness -v
+```
+
+## Test Execution Flow
+
+**Prerequisites** (one-time setup):
+1. Generated ground truth (`tools/rag_eval_cli.py generate`)
+2. Uploaded corpus to Nextcloud (`tools/rag_eval_cli.py upload`)
+
+### Retrieval Quality Tests
+
+1. **Setup** (`nfcorpus_test_data` fixture):
+   - Loads pre-generated ground truth from `fixtures/ground_truth.json`
+   - Loads note mapping from `fixtures/note_mapping.json`
+   - Returns test cases with expected note IDs
+
+2. **Test** (`test_retrieval_context_recall`):
+   - For each query: Perform semantic search (top-10)
+   - Extract retrieved note IDs
+   - Calculate Context Recall = (expected ∩ retrieved) / expected
+   - Assert recall ≥ 80%
+
+3. **Cleanup**:
+   - None required (notes persist in Nextcloud for reuse)
+
+### Generation Quality Tests
+
+1. **Setup**:
+   - Same as retrieval tests (reuses `nfcorpus_test_data` fixture)
+   - Creates evaluation LLM provider
+
+2. **Test** (`test_answer_correctness`):
+   - For each query: Call `nc_semantic_search_answer` MCP tool
+   - Extract generated answer
+   - Use LLM-as-judge to compare vs ground truth
+   - Assert semantic equivalence (TRUE/FALSE)
+
+3. **Cleanup**:
+   - LLM provider closed
+
+## Expected Test Duration
+
+**One-time setup:**
+- **Generate ground truth**: ~5-10 minutes (5 queries with LLM generation)
+- **Upload corpus**: ~10-15 minutes (3,633 documents)
+- **Total setup**: ~15-25 minutes
+
+**Test execution** (after setup):
+- **Retrieval tests**: ~1-2 minutes (5 queries, no upload/cleanup)
+- **Generation tests**: ~5-10 minutes (RAG generation + LLM evaluation)
+- **Total per run**: ~6-12 minutes
+
+**Note**: These are NOT smoke tests and are NOT run in CI.
+
+## Limitations & Future Work
+
+**Current Limitations:**
+- Only 5 test queries (limited statistical confidence)
+- Medical domain bias (may not represent production use cases)
+- Synthetic ground truth (LLM-generated, not human-validated)
+- Manual test execution (requires external LLM access)
+
+**Future Enhancements:**
+- Expand to 50-100 queries for statistical significance
+- Add custom test dataset with production-representative documents
+- Implement additional metrics (faithfulness, context relevance, answer relevance)
+- Create automated benchmarking dashboard
+- Test multi-hop reasoning (synthesis questions)
+- Evaluate out-of-scope handling ("I don't know" responses)
+
+## Troubleshooting
+
+### Tests Fail with "Ground truth file not found"
+
+Run the generate command first:
+```bash
+uv run python tools/rag_eval_cli.py generate
+```
+
+### Tests Fail with "Note mapping file not found"
+
+Run the upload command first:
+```bash
+uv run python tools/rag_eval_cli.py upload --nextcloud-url http://localhost:8000 --username admin --password admin
+```
+
+### Tests Fail with "MCP sampling client not yet implemented"
+
+The `mcp_sampling_client` fixture is a placeholder. You need to implement MCP client creation with sampling support. See the TODO in `conftest.py`.
+
+### Upload Command Fails
+
+Common issues:
+1. **Nextcloud not running**: Ensure Nextcloud is accessible at the URL
+2. **Invalid credentials**: Verify username/password
+3. **Notes app not installed**: Install Notes app in Nextcloud
+4. **Network timeout**: Increase timeout in CLI (currently 60s)
+
+### LLM Timeout
+
+If ground truth generation times out:
+1. Increase timeout in `llm_providers.py` (currently 10 min)
+2. Use a faster model: `--model llama3.2:1b`
+3. Check Ollama/Anthropic service availability
+
+### Dataset Download Fails
+
+The nfcorpus dataset is downloaded automatically. If download fails:
+1. Check internet connection
+2. Manually download from: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip
+3. Extract to `tests/rag_evaluation/fixtures/nfcorpus/`
+4. Or use HuggingFace datasets cache: `~/.cache/huggingface/datasets/BeIR___nfcorpus/`
+
+### Vector Sync Not Indexing Documents
+
+After uploading, vector sync must index the documents:
+1. Check vector sync is enabled in Nextcloud
+2. Trigger manual sync if needed
+3. Wait for background job to process all documents
+4. Verify in Qdrant that vectors exist for uploaded notes
+
+## References
+
+- [ADR-013: RAG Evaluation Testing Framework](../../docs/ADR-013-rag-evaluation.md)
+- [ADR-008: MCP Sampling for Semantic Search](../../docs/ADR-008-mcp-sampling-for-semantic-search.md)
+- [BeIR Benchmark](https://github.com/beir-cellar/beir)
+- [NFCorpus Dataset](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/)
@@ -0,0 +1 @@
+"""RAG evaluation tests for the Nextcloud MCP semantic search system."""
@@ -0,0 +1,145 @@
+"""Pytest fixtures for RAG evaluation tests.
+
+IMPORTANT: Before running these tests, you must:
+1. Generate ground truth: uv run python tools/rag_eval_cli.py generate
+2. Upload corpus: uv run python tools/rag_eval_cli.py upload --nextcloud-url http://localhost:8000 --username admin --password admin
+
+This ensures that the ground truth and note mappings are available.
+"""
+
+import json
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from tests.rag_evaluation.llm_providers import create_llm_provider
+
+# Paths
+FIXTURES_DIR = Path(__file__).parent / "fixtures"
+GROUND_TRUTH_FILE = FIXTURES_DIR / "ground_truth.json"
+NOTE_MAPPING_FILE = FIXTURES_DIR / "note_mapping.json"
+
+
+@pytest.fixture(scope="session")
+def ground_truth_data() -> list[dict[str, Any]]:
+    """Load pre-generated ground truth data.
+
+    Returns:
+        List of test cases with query, ground truth answer, and expected doc IDs
+
+    Raises:
+        FileNotFoundError: If ground_truth.json doesn't exist
+    """
+    if not GROUND_TRUTH_FILE.exists():
+        raise FileNotFoundError(
+            f"Ground truth file not found: {GROUND_TRUTH_FILE}\n"
+            "Run: uv run python tools/rag_eval_cli.py generate"
+        )
+
+    with open(GROUND_TRUTH_FILE) as f:
+        return json.load(f)
+
+
+@pytest.fixture(scope="session")
+def note_mapping() -> dict[str, int]:
+    """Load document ID → note ID mapping.
+
+    Returns:
+        Dict mapping nfcorpus document ID to Nextcloud note ID
+
+    Raises:
+        FileNotFoundError: If note_mapping.json doesn't exist
+    """
+    if not NOTE_MAPPING_FILE.exists():
+        raise FileNotFoundError(
+            f"Note mapping file not found: {NOTE_MAPPING_FILE}\n"
+            "Run: uv run python tools/rag_eval_cli.py upload --nextcloud-url ... --username ... --password ..."
+        )
+
+    with open(NOTE_MAPPING_FILE) as f:
+        return json.load(f)
+
+
+@pytest.fixture(scope="session")
+def nfcorpus_test_data(
+    ground_truth_data: list[dict[str, Any]],
+    note_mapping: dict[str, int],
+):
+    """Prepare nfcorpus test data for evaluation.
+
+    This fixture combines ground truth answers with note mappings to create
+    test cases ready for retrieval and generation quality tests.
+
+    Args:
+        ground_truth_data: Pre-generated ground truth answers
+        note_mapping: Document ID → note ID mapping
+
+    Returns:
+        List of test cases with query, ground truth, expected doc IDs, and note IDs
+    """
+    test_cases = []
+
+    for gt in ground_truth_data:
+        # Map expected document IDs to note IDs
+        expected_note_ids = [
+            note_mapping.get(doc_id)
+            for doc_id in gt["expected_document_ids"]
+            if doc_id in note_mapping
+        ]
+
+        # Filter out None values (docs that weren't uploaded)
+        expected_note_ids = [nid for nid in expected_note_ids if nid is not None]
+
+        test_cases.append(
+            {
+                "query_id": gt["query_id"],
+                "query_text": gt["query_text"],
+                "ground_truth_answer": gt["ground_truth_answer"],
+                "expected_document_ids": gt["expected_document_ids"],
+                "expected_note_ids": expected_note_ids,
+                "highly_relevant_count": gt["highly_relevant_count"],
+            }
+        )
+
+    return test_cases
+
+
+@pytest.fixture(scope="session")
+async def evaluation_llm():
+    """Create LLM provider for evaluation (separate from MCP client).
+
+    Environment variables:
+      RAG_EVAL_PROVIDER: Provider type (ollama or anthropic)
+      RAG_EVAL_OLLAMA_BASE_URL: Ollama base URL (or OLLAMA_HOST)
+      RAG_EVAL_OLLAMA_MODEL: Ollama model name
+      RAG_EVAL_ANTHROPIC_API_KEY: Anthropic API key
+      RAG_EVAL_ANTHROPIC_MODEL: Anthropic model name
+
+    Returns:
+        LLM provider instance (OllamaProvider or AnthropicProvider)
+    """
+    llm = create_llm_provider()
+    yield llm
+    await llm.close()
+
+
+@pytest.fixture(scope="session")
+async def mcp_sampling_client():
+    """Create MCP client that supports sampling for RAG generation.
+
+    This fixture creates an MCP client configured to support sampling,
+    which is required for testing the nc_semantic_search_answer tool.
+
+    TODO: Implement MCP client with sampling support
+    For now, this is a placeholder.
+
+    Returns:
+        MCP client instance with sampling enabled
+    """
+    # TODO: Implement MCP client creation with sampling support
+    # This will require:
+    # 1. Creating an MCP client configured for sampling
+    # 2. Authenticating with Nextcloud
+    # 3. Ensuring sampling is enabled
+    pytest.skip("MCP sampling client not yet implemented")
@@ -0,0 +1,111 @@
+"""LLM provider abstraction for RAG evaluation.
+
+DEPRECATED: This module is maintained for backward compatibility with RAG evaluation tests.
+New code should use nextcloud_mcp_server.providers directly.
+
+Supports Ollama (local), Anthropic (cloud), Bedrock (AWS), and OpenAI (cloud) providers
+for both ground truth generation and evaluation.
+"""
+
+import os
+
+from nextcloud_mcp_server.providers import (
+    AnthropicProvider,
+    BedrockProvider,
+    OllamaProvider,
+    OpenAIProvider,
+    Provider,
+)
+
+
+def create_llm_provider(
+    provider: str | None = None,
+    ollama_base_url: str | None = None,
+    ollama_model: str | None = None,
+    anthropic_api_key: str | None = None,
+    anthropic_model: str | None = None,
+    bedrock_region: str | None = None,
+    bedrock_model: str | None = None,
+    openai_api_key: str | None = None,
+    openai_base_url: str | None = None,
+    openai_model: str | None = None,
+) -> Provider:
+    """Create an LLM provider from environment variables or arguments.
+
+    Args:
+        provider: Provider type ('ollama', 'anthropic', 'bedrock', or 'openai').
+            Defaults to RAG_EVAL_PROVIDER env var or 'ollama'
+        ollama_base_url: Ollama base URL. Defaults to RAG_EVAL_OLLAMA_BASE_URL or 'http://localhost:11434'
+        ollama_model: Ollama model. Defaults to RAG_EVAL_OLLAMA_MODEL or 'llama3.2:1b'
+        anthropic_api_key: Anthropic API key. Defaults to RAG_EVAL_ANTHROPIC_API_KEY env var
+        anthropic_model: Anthropic model. Defaults to RAG_EVAL_ANTHROPIC_MODEL or 'claude-3-5-sonnet-20241022'
+        bedrock_region: AWS region. Defaults to RAG_EVAL_BEDROCK_REGION or AWS_REGION env var
+        bedrock_model: Bedrock model ID. Defaults to RAG_EVAL_BEDROCK_MODEL or
+            'anthropic.claude-3-sonnet-20240229-v1:0'
+        openai_api_key: OpenAI API key. Defaults to OPENAI_API_KEY env var
+        openai_base_url: OpenAI base URL. Defaults to OPENAI_BASE_URL (for GitHub Models API)
+        openai_model: OpenAI model. Defaults to OPENAI_GENERATION_MODEL or 'gpt-4o-mini'
+
+    Returns:
+        Provider instance
+
+    Raises:
+        ValueError: If provider is invalid or required credentials are missing
+    """
+    # Get provider from args or env
+    provider = provider or os.environ.get("RAG_EVAL_PROVIDER", "ollama")
+
+    if provider == "ollama":
+        # Try RAG_EVAL_OLLAMA_BASE_URL, then OLLAMA_HOST, then default
+        base_url = (
+            ollama_base_url
+            or os.environ.get("RAG_EVAL_OLLAMA_BASE_URL")
+            or os.environ.get("OLLAMA_HOST")
+            or "http://localhost:11434"
+        )
+        model = ollama_model or os.environ.get("RAG_EVAL_OLLAMA_MODEL", "llama3.2:1b")
+        return OllamaProvider(
+            base_url=base_url, embedding_model=None, generation_model=model
+        )
+
+    elif provider == "anthropic":
+        api_key = anthropic_api_key or os.environ.get("RAG_EVAL_ANTHROPIC_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "Anthropic API key required. Set RAG_EVAL_ANTHROPIC_API_KEY environment variable."
+            )
+        model = anthropic_model or os.environ.get(
+            "RAG_EVAL_ANTHROPIC_MODEL", "claude-3-5-sonnet-20241022"
+        )
+        return AnthropicProvider(api_key=api_key, model=model)
+
+    elif provider == "bedrock":
+        region = bedrock_region or os.environ.get(
+            "RAG_EVAL_BEDROCK_REGION", os.environ.get("AWS_REGION", "us-east-1")
+        )
+        model = bedrock_model or os.environ.get(
+            "RAG_EVAL_BEDROCK_MODEL", "anthropic.claude-3-sonnet-20240229-v1:0"
+        )
+        return BedrockProvider(
+            region_name=region, embedding_model=None, generation_model=model
+        )
+
+    elif provider == "openai":
+        api_key = openai_api_key or os.environ.get("OPENAI_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "OpenAI API key required. Set OPENAI_API_KEY environment variable."
+            )
+        base_url = openai_base_url or os.environ.get("OPENAI_BASE_URL")
+        model = openai_model or os.environ.get("OPENAI_GENERATION_MODEL", "gpt-4o-mini")
+        return OpenAIProvider(
+            api_key=api_key,
+            base_url=base_url,
+            embedding_model=None,
+            generation_model=model,
+        )
+
+    else:
+        raise ValueError(
+            f"Invalid provider: {provider}. Must be 'ollama', 'anthropic', 'bedrock', or 'openai'."
+        )
@@ -0,0 +1,139 @@
+"""Tests for RAG generation quality (Answer Correctness metric).
+
+These tests evaluate whether the MCP client LLM generates factually correct
+answers from retrieved context using the nc_semantic_search_answer tool.
+
+Metric: Answer Correctness
+- Measures: Is the generated answer factually correct?
+- Method: LLM-as-judge - Compare RAG answer vs ground truth (binary true/false)
+- Evaluation: External LLM evaluates semantic equivalence
+"""
+
+import pytest
+
+
+@pytest.mark.integration
+async def test_answer_correctness(
+    mcp_sampling_client,
+    evaluation_llm,
+    nfcorpus_test_data,
+):
+    """Test that RAG system generates factually correct answers.
+
+    For each test query:
+    1. Execute full RAG pipeline via nc_semantic_search_answer MCP tool
+    2. Extract generated answer from RAG response
+    3. Use LLM-as-judge to compare against ground truth (binary true/false)
+    4. Assert answer is semantically equivalent to ground truth
+
+    This tests the quality of the generation component (MCP client LLM).
+    """
+    results_summary = []
+
+    for test_case in nfcorpus_test_data:
+        query = test_case["query_text"]
+        ground_truth = test_case["ground_truth_answer"]
+
+        print(f"\n{'=' * 80}")
+        print(f"Query: {query}")
+
+        # Execute full RAG pipeline
+        print("Executing RAG pipeline...")
+        rag_result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": query, "limit": 5},
+        )
+
+        rag_answer = rag_result["generated_answer"]
+
+        print(f"RAG Answer preview: {rag_answer[:200]}...")
+        print(f"Ground Truth preview: {ground_truth[:200]}...")
+
+        # LLM-as-judge evaluation
+        evaluation_prompt = f"""Compare these two answers and respond with only TRUE or FALSE.
+
+Question: {query}
+
+Generated Answer: {rag_answer}
+
+Ground Truth Answer: {ground_truth}
+
+Are these answers semantically equivalent (do they convey the same factual information)?
+Respond with only: TRUE or FALSE"""
+
+        print("Evaluating answer correctness...")
+        evaluation_result = await evaluation_llm.generate(
+            evaluation_prompt,
+            max_tokens=10,
+        )
+
+        is_correct = evaluation_result.strip().upper() == "TRUE"
+
+        result = {
+            "query_id": test_case["query_id"],
+            "query": query,
+            "rag_answer_length": len(rag_answer),
+            "ground_truth_length": len(ground_truth),
+            "is_correct": is_correct,
+            "evaluation_result": evaluation_result.strip(),
+        }
+        results_summary.append(result)
+
+        print(f"  Evaluation: {evaluation_result.strip()}")
+        print(f"  Status: {'✓ CORRECT' if is_correct else '✗ INCORRECT'}")
+
+        # Assert answer correctness
+        assert is_correct, (
+            f"Answer mismatch for query: {query}\n\n"
+            f"Generated Answer:\n{rag_answer}\n\n"
+            f"Ground Truth:\n{ground_truth}\n\n"
+            f"Evaluation: {evaluation_result.strip()}"
+        )
+
+    # Print summary
+    print(f"\n{'=' * 80}")
+    print("Answer Correctness Summary:")
+    print(f"  Total queries: {len(results_summary)}")
+    print(f"  Correct: {sum(r['is_correct'] for r in results_summary)}")
+    print(f"  Incorrect: {sum(not r['is_correct'] for r in results_summary)}")
+    accuracy = sum(r["is_correct"] for r in results_summary) / len(results_summary)
+    print(f"  Accuracy: {accuracy:.2%}")
+    print(f"{'=' * 80}")
+
+
+@pytest.mark.integration
+async def test_answer_contains_sources(mcp_sampling_client, nfcorpus_test_data):
+    """Test that RAG answers include source citations.
+
+    This is a basic quality check - we verify that the nc_semantic_search_answer
+    tool returns both a generated answer and source documents.
+    """
+    for test_case in nfcorpus_test_data:
+        query = test_case["query_text"]
+
+        # Execute RAG pipeline
+        rag_result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": query, "limit": 5},
+        )
+
+        # Check response structure
+        assert "generated_answer" in rag_result, "Response missing 'generated_answer'"
+        assert "sources" in rag_result, "Response missing 'sources'"
+
+        # Check sources are provided
+        sources = rag_result["sources"]
+        assert len(sources) > 0, f"No sources returned for query: {query}"
+
+        # Check each source has required fields
+        for i, source in enumerate(sources):
+            assert "document_id" in source or "id" in source, (
+                f"Source {i} missing document ID"
+            )
+            assert "excerpt" in source or "content" in source or "text" in source, (
+                f"Source {i} missing content"
+            )
+
+        print(f"Query: {query}")
+        print(f"  Sources provided: {len(sources)}")
+        print("  Status: ✓ PASS")
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`"""RAG evaluation tests for the Nextcloud MCP semantic search system."""`