feat: auto-derive oidc.discovery_url from NEXTCLOUD_HOST

When OIDC_DISCOVERY_URL is not explicitly set, the status endpoint now auto-derives the discovery URL from NEXTCLOUD_HOST using the standard well-known path. This allows Astrolabe to discover OIDC endpoints without requiring explicit OIDC configuration. The oidc block is now included in the status response regardless of auth mode when a discovery URL is available (explicit or derived), enabling smoother auth mode transitions. Closes #1
bump: version 0.58.2 → 0.58.3
2026-03-29 12:56:50 -06:00 · 2026-03-16 17:38:05 +00:00 · 2026-03-16 18:37:45 +01:00 · 2026-03-16 17:22:55 +00:00 · 2026-03-14 15:56:15 +00:00 · 2026-03-14 16:55:59 +01:00
282 changed files with 60794 additions and 4615 deletions
@@ -5,3 +5,5 @@
 !uv.lock

 !nextcloud_mcp_server/**/*.py
+!nextcloud_mcp_server/**/*.html
+!nextcloud_mcp_server/auth/static/*
@@ -7,26 +7,158 @@ on:

 jobs:
  bump-version:
-    if: "!startsWith(github.event.head_commit.message, 'bump:')"
+    if: "!startsWith(github.event.head_commit.message, 'bump:') && !startsWith(github.event.head_commit.message, 'chore(release):')"
    runs-on: ubuntu-latest
-    name: "Bump version and create changelog with commitizen"
+    name: "Bump version and create changelog for monorepo components"
    permissions:
      contents: write
      packages: write
    steps:
      - name: Check out
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0
          token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
-      - name: Create bump and changelog
-        uses: commitizen-tools/commitizen-action@5b0848cd060263e24602d1eba03710e056ef7711 # 0.24.0
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
        with:
-          github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
-          changelog_increment_filename: body.md
-      - name: Release
-        uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
-        with:
-          body_path: "body.md"
-          tag_name: v${{ env.REVISION }}
-          token: ${{ secrets.GITHUB_TOKEN }}
+          python-version: '3.11'
+
+      - name: Install uv
+        run: |
+          curl -LsSf https://astral.sh/uv/install.sh | sh
+          echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+
+      - name: Configure git
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Detect and bump component versions
+        id: bump
+        run: |
+          set -euo pipefail
+
+          # Track which components were bumped
+          BUMPED_COMPONENTS=""
+
+          # Helper function to check for commits with specific scope since last tag
+          has_commits_since_tag() {
+            local tag_pattern="$1"
+            local scope_pattern="$2"
+
+            # Get the most recent tag matching the pattern
+            local last_tag=$(git tag --sort=-creatordate | grep -E "^${tag_pattern}" | head -n 1 || echo "")
+
+            if [ -z "$last_tag" ]; then
+              # No previous tag, check all commits on master
+              local commit_range="master"
+            else
+              # Check commits since last tag
+              local commit_range="${last_tag}..HEAD"
+            fi
+
+            # Count commits matching the scope pattern
+            local commit_count=$(git log "$commit_range" --oneline --grep="^${scope_pattern}" -E | wc -l)
+
+            if [ "$commit_count" -gt 0 ]; then
+              echo "Found $commit_count commits for scope '$scope_pattern' since $last_tag"
+              return 0
+            else
+              echo "No commits found for scope '$scope_pattern' since $last_tag"
+              return 1
+            fi
+          }
+
+          # Bump MCP server (default - all commits except helm scope)
+          echo "Checking MCP server for version bump..."
+
+          # Get the most recent MCP tag
+          last_mcp_tag=$(git tag --sort=-creatordate | grep -E "^v[0-9]" | head -n 1 || echo "")
+
+          if [ -z "$last_mcp_tag" ]; then
+            commit_range="master"
+          else
+            commit_range="${last_mcp_tag}..HEAD"
+          fi
+
+          # Count conventional commits that are NOT scoped to helm
+          mcp_commit_count=$(git log "$commit_range" --oneline --grep="^(feat|fix|docs|refactor|perf|test|build|ci|chore)" -E | \
+            { grep -v "(helm)" || true; } | wc -l)
+
+          MCP_BUMPED=false
+          if [ "$mcp_commit_count" -gt 0 ]; then
+            echo "Found $mcp_commit_count commits for MCP server since $last_mcp_tag"
+            echo "Bumping MCP server version..."
+            ./scripts/bump-mcp.sh
+            BUMPED_COMPONENTS="$BUMPED_COMPONENTS mcp"
+            MCP_BUMPED=true
+          else
+            echo "No commits found for MCP server since $last_mcp_tag"
+          fi
+
+          # Bump Helm chart (scope: helm OR when MCP appVersion changes)
+          echo "Checking Helm chart for version bump..."
+          HELM_HAS_COMMITS=false
+          if has_commits_since_tag "nextcloud-mcp-server-" "(feat|fix|docs|refactor|perf|test|build|ci|chore)\(helm\)(!)?:"; then
+            HELM_HAS_COMMITS=true
+          fi
+
+          if [ "$HELM_HAS_COMMITS" = true ]; then
+            echo "Bumping Helm chart version (helm-scoped commits)..."
+            ./scripts/bump-helm.sh
+            BUMPED_COMPONENTS="$BUMPED_COMPONENTS helm"
+          elif [ "$MCP_BUMPED" = true ]; then
+            echo "Bumping Helm chart version (appVersion changed)..."
+            ./scripts/bump-helm.sh --increment PATCH
+            BUMPED_COMPONENTS="$BUMPED_COMPONENTS helm"
+          fi
+
+          # Output summary
+          if [ -z "$BUMPED_COMPONENTS" ]; then
+            echo "No components required version bumps"
+            echo "bumped=false" >> $GITHUB_OUTPUT
+          else
+            echo "Bumped components:$BUMPED_COMPONENTS"
+            echo "bumped=true" >> $GITHUB_OUTPUT
+            echo "components=$BUMPED_COMPONENTS" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Push tags
+        if: steps.bump.outputs.bumped == 'true'
+        run: |
+          git push
+          git push --tags
+          echo "Pushed tags for components:${{ steps.bump.outputs.components }}"
+
+      - name: Summary
+        run: |
+          if [ "${{ steps.bump.outputs.bumped }}" == "true" ]; then
+            echo "## Version Bump Summary" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "The following components were bumped:" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+
+            for component in ${{ steps.bump.outputs.components }}; do
+              case $component in
+                mcp)
+                  tag=$(git tag --sort=-creatordate | grep -E '^v[0-9]' | head -n 1)
+                  echo "- **MCP Server**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
+                  ;;
+                helm)
+                  tag=$(git tag --sort=-creatordate | grep -E '^nextcloud-mcp-server-' | head -n 1)
+                  echo "- **Helm Chart**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
+                  ;;
+              esac
+            done
+
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "Tags have been pushed and release workflows will trigger automatically." >> $GITHUB_STEP_SUMMARY
+          else
+            echo "## Version Bump Summary" >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "✅ No version bumps required - no relevant commits found since last release." >> $GITHUB_STEP_SUMMARY
+            echo "" >> $GITHUB_STEP_SUMMARY
+            echo "The workflow completed successfully with no changes." >> $GITHUB_STEP_SUMMARY
+          fi
@@ -0,0 +1,58 @@
+name: Claude Code Review
+
+on:
+  pull_request:
+    types: [opened, synchronize]
+    # Optional: Only run on specific file changes
+    # paths:
+    #   - "src/**/*.ts"
+    #   - "src/**/*.tsx"
+    #   - "src/**/*.js"
+    #   - "src/**/*.jsx"
+
+jobs:
+  claude-review:
+    # Optional: Filter by PR author
+    # if: |
+    #   github.event.pull_request.user.login == 'external-contributor' ||
+    #   github.event.pull_request.user.login == 'new-developer' ||
+    #   github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code Review
+        id: claude-review
+        uses: anthropics/claude-code-action@cd77b50d2b0808657f8e6774085c8bf54484351c # v1.0.72
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+          allowed_bots: "renovate-bot-cbcoutinho"
+          prompt: |
+            REPO: ${{ github.repository }}
+            PR NUMBER: ${{ github.event.pull_request.number }}
+
+            Please review this pull request and provide feedback on:
+            - Code quality and best practices
+            - Potential bugs or issues
+            - Performance considerations
+            - Security concerns
+            - Test coverage
+
+            Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
+
+            Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
+
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
+
@@ -0,0 +1,50 @@
+name: Claude Code
+
+on:
+  issue_comment:
+    types: [created]
+  pull_request_review_comment:
+    types: [created]
+  issues:
+    types: [opened, assigned]
+  pull_request_review:
+    types: [submitted]
+
+jobs:
+  claude:
+    if: |
+      (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
+      (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
+      (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+      issues: read
+      id-token: write
+      actions: read # Required for Claude to read CI results on PRs
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 1
+
+      - name: Run Claude Code
+        id: claude
+        uses: anthropics/claude-code-action@cd77b50d2b0808657f8e6774085c8bf54484351c # v1.0.72
+        with:
+          claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
+
+          # This is an optional setting that allows Claude to read CI results on PRs
+          additional_permissions: |
+            actions: read
+
+          # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
+          # prompt: 'Update the pull request description to include a summary of changes.'
+
+          # Optional: Add claude_args to customize behavior and configuration
+          # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
+          # or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
+          # claude_args: '--allowed-tools Bash(gh pr:*)'
+
@@ -2,7 +2,8 @@ name: Build and Publish Docker Image

 on:
  push:
-    tags: ["*"]
+    tags:
+      - "v*"

 jobs:
  build-and-push:
@@ -12,11 +13,11 @@ jobs:
      packages: write
    steps:
      - name: Checkout repository
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5
+        uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5.10.0
        with:
          # list of Docker images to use as base name for tags
          images: |
@@ -33,18 +34,18 @@ jobs:
            type=raw,value=latest,enable={{is_default_branch}}

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
+        uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0

      - name: Log in to GitHub Container Registry
        if: github.event_name != 'pull_request'
-        uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3
+        uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3.7.0
        with:
          registry: ghcr.io
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}

      - name: Build and push Docker image
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 # v6
+        uses: docker/build-push-action@10e90e3645eae34f1e60eeb005ba3a3d33f178e8 # v6.19.2
        with:
          push: ${{ github.event_name != 'pull_request' }}
          tags: ${{ steps.meta.outputs.tags }}
@@ -4,6 +4,7 @@ on:
  push:
    tags:
      - v*
+      - nextcloud-mcp-server-*

 jobs:
  release:
@@ -14,7 +15,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          fetch-depth: 0

@@ -38,6 +39,8 @@ jobs:

      - name: Run chart-releaser
        uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
+        with:
+          skip_existing: true
        env:
          CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"

@@ -0,0 +1,105 @@
+name: RAG Evaluation
+
+on:
+  workflow_dispatch:
+    inputs:
+      manual_path:
+        description: 'Path to Nextcloud User Manual PDF in Nextcloud'
+        required: false
+        default: 'Nextcloud Manual.pdf'
+      embedding_model:
+        description: 'OpenAI embedding model'
+        required: false
+        default: 'openai/text-embedding-3-small'
+      generation_model:
+        description: 'OpenAI generation model'
+        required: false
+        default: 'openai/gpt-4o-mini'
+
+jobs:
+  rag-evaluation:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+    permissions:
+      models: read
+
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+
+      - name: Run docker compose with vector sync
+        uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
+        with:
+          compose-file: |
+            ./docker-compose.yml
+            ./docker-compose.ci.yml
+          up-flags: "--build"
+        env:
+          # Environment variables passed to docker-compose.ci.yml
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+          VECTOR_SYNC_SCAN_INTERVAL: "5"
+
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
+
+      - name: Wait for Nextcloud to be ready
+        run: |
+          echo "Waiting for Nextcloud..."
+          max_attempts=60
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "Service did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+            sleep 5
+          done
+          echo "Nextcloud is ready."
+
+      - name: Wait for MCP server to be ready
+        run: |
+          echo "Waiting for MCP server..."
+          max_attempts=30
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "MCP server did not become ready in time."
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
+            sleep 2
+          done
+          echo "MCP server is ready."
+
+      - name: Run RAG evaluation tests
+        env:
+          NEXTCLOUD_HOST: "http://localhost:8080"
+          NEXTCLOUD_USERNAME: "admin"
+          NEXTCLOUD_PASSWORD: "admin"
+          RAG_MANUAL_PATH: ${{ inputs.manual_path }}
+          OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
+          OPENAI_BASE_URL: "https://models.github.ai/inference"
+          OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
+          OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
+        run: |
+          uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai
+
+      - name: Capture MCP container logs
+        if: always()
+        run: |
+          echo "=== MCP Container Logs ==="
+          docker compose logs mcp --tail=500
+
+      - name: Upload test results
+        if: always()
+        uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+        with:
+          name: rag-evaluation-results
+          path: |
+            pytest-results.xml
+          retention-days: 30
@@ -18,9 +18,9 @@ jobs:
      contents: read
    steps:
      - name: Checkout
-        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
      - name: Install Python 3.11
        run: uv python install 3.11
      - name: Build
@@ -1,4 +1,4 @@
-name: Docker Compose Action
+name: Tests

 on:
  pull_request:
@@ -9,80 +9,205 @@ jobs:
  linting:
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
      - name: Check format
-        run: |
-          uv run --frozen ruff format --diff
+        run: uv run --frozen ruff format --diff
      - name: Linting
-        run: |
-          uv run --frozen ruff check
-      - name: Linting
-        run: |
-          uv run --frozen ty check -- nextcloud_mcp_server
+        run: uv run --frozen ruff check
+      - name: Type check
+        run: uv run --frozen ty check -- nextcloud_mcp_server

+  unit-test:
+    runs-on: ubuntu-latest
+    needs: [linting]
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - name: Install the latest version of uv
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0
+      - name: Run unit tests
+        run: uv run pytest -v -m unit -o "addopts=-p no:asyncio"

  integration-test:
    runs-on: ubuntu-latest
+    needs: [linting]
+    strategy:
+      fail-fast: false
+      matrix:
+        nextcloud_version:
+          - "31"
+          - "32"
+          # - "33"  # Disabled until all upstream apps support NC 33
+        mode:
+          - "single-user"
+          - "multi-user-basic"
+          - "oauth"
+          - "login-flow"
+        include:
+          # Version-specific image pins — Renovate updates these via customManagers
+          # renovate: datasource=docker depName=docker.io/library/nextcloud
+          - nextcloud_version: "31"
+            nextcloud_image: "docker.io/library/nextcloud:31.0.14@sha256:9bf3fae91aad4dca3eff02c1f71df8d5c6705a349065fb537aa5c5ef578f1013"
+          # renovate: datasource=docker depName=docker.io/library/nextcloud
+          - nextcloud_version: "32"
+            nextcloud_image: "docker.io/library/nextcloud:32.0.6@sha256:5c4e09f72f096cd68379a8ae69f71e61d13da5a07430fc4a17c702a14e6a4267"
+          # renovate: datasource=docker depName=docker.io/library/nextcloud
+          # Disabled until all upstream apps support NC 33
+          # - nextcloud_version: "33"
+          #   nextcloud_image: "docker.io/library/nextcloud:33.0.0@sha256:d53f6cb35b0712aa890a5e4a8ca21043d6fcd390f38c55b710816dd7cbc2edc0"
+
+          # Mode-specific properties
+          - mode: single-user
+            profile: single-user
+            markers: "(smoke and not oauth and not keycloak and not login_flow and not multi_user_basic) or (integration and not oauth and not keycloak and not login_flow and not multi_user_basic)"
+            wait-port: 8000
+            mcp-internal-url: "http://mcp:8000"
+            needs-playwright: false
+            extra-args: >-
+              --ignore=tests/integration/test_qdrant_collection_creation.py
+              --ignore=tests/rag_evaluation/
+
+          - mode: multi-user-basic
+            profile: multi-user-basic
+            markers: "multi_user_basic"
+            wait-port: 8003
+            mcp-internal-url: "http://mcp-multi-user-basic:8000"
+            needs-playwright: true
+            extra-args: ""
+
+          - mode: oauth
+            profile: oauth
+            markers: "oauth and not keycloak"
+            wait-port: 8001
+            mcp-internal-url: "http://mcp-oauth:8001"
+            needs-playwright: true
+            extra-args: ""
+
+          - mode: login-flow
+            profile: login-flow
+            markers: "login_flow"
+            wait-port: 8004
+            mcp-internal-url: "http://mcp-login-flow:8004"
+            needs-playwright: true
+            extra-args: ""
+
+    name: integration (${{ matrix.mode }} / nc${{ matrix.nextcloud_version }})

    steps:
-      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
        with:
          submodules: 'true'

-
-      ###### Required to build OIDC App ######
-
-      - name: Set up php 8.4
-        uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
+      - name: Set up PHP 8.4
+        if: matrix.mode != 'single-user'
+        uses: shivammathur/setup-php@44454db4f0199b8b9685a5d763dc37cbf79108e1 # 2.36.0
        with:
          php-version: 8.4
          coverage: none

-      - name: Install OIDC app composer dependencies
+      # OIDC app installed from app store (dev mount removed from docker-compose.yml)
+
+      - name: Set up Node.js
+        if: matrix.mode != 'single-user'
+        uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
+        with:
+          node-version: 24
+
+      - name: Build Astrolabe app
+        if: matrix.mode != 'single-user'
        run: |
-          cd third_party/oidc
-          composer install --no-dev
-
-      ###### Required to build OIDC App ######
-
+          cd third_party/astrolabe
+          composer install --no-dev --optimize-autoloader
+          npm ci
+          npm run build

+      # Start services with the appropriate profile
      - name: Run docker compose
-        uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
+        uses: hoverkraft-tech/compose-action@4894d2492015c1774ee5a13a95b1072093087ec3 # v2.5.0
        with:
          compose-file: "./docker-compose.yml"
-          #compose-flags: "--profile qdrant"
+          compose-flags: "--profile ${{ matrix.profile }}"
          up-flags: "--build"
+        env:
+          MCP_SERVER_URL: ${{ matrix.mcp-internal-url }}
+          NEXTCLOUD_IMAGE: ${{ matrix.nextcloud_image }}

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7.6.0

-      - name: Install Playwright dependencies
-        run: |
-          uv run playwright install chromium --with-deps
+      - name: Install Playwright
+        if: matrix.needs-playwright
+        run: uv run playwright install chromium --with-deps

-      - name: Wait for service to be ready
+      # Wait for Nextcloud to be healthy
+      - name: Wait for Nextcloud
        run: |
-          echo "Waiting for service at http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info to return 401..."
+          echo "Waiting for Nextcloud at http://localhost:8080..."
          max_attempts=60
          attempt=0
-          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
+          until curl -sSf http://localhost:8080/status.php 2>/dev/null | grep -q '"installed":true'; do
            attempt=$((attempt + 1))
            if [ $attempt -ge $max_attempts ]; then
-              echo "Service did not become ready in time."
+              echo "Nextcloud did not become ready in time."
+              docker compose logs app
              exit 1
            fi
-            echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
+            echo "Attempt $attempt/$max_attempts: Not ready, sleeping 5s..."
            sleep 5
          done
-          echo "Service is ready (returned 401)."
+          echo "Nextcloud is ready."

-      # Add subsequent steps here, e.g., running tests
-      - name: Run tests
+      # Wait for the MCP service to be healthy
+      - name: Wait for MCP service (${{ matrix.mode }})
+        run: |
+          echo "Waiting for MCP service on port ${{ matrix.wait-port }}..."
+          max_attempts=30
+          attempt=0
+          until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:${{ matrix.wait-port }}/health 2>/dev/null | grep -qE "200|404|405"; do
+            attempt=$((attempt + 1))
+            if [ $attempt -ge $max_attempts ]; then
+              echo "MCP service did not become ready in time."
+              docker compose --profile ${{ matrix.profile }} logs
+              exit 1
+            fi
+            echo "Attempt $attempt/$max_attempts: Not ready, sleeping 5s..."
+            sleep 5
+          done
+          echo "MCP service is ready on port ${{ matrix.wait-port }}."
+
+      - name: Verify OIDC configuration
+        if: matrix.mode == 'oauth' || matrix.mode == 'login-flow'
+        run: |
+          echo "=== OIDC Discovery ==="
+          curl -s http://localhost:8080/.well-known/openid-configuration | jq .
+          echo "=== OIDC App Status ==="
+          docker compose exec -T app php occ app:list --output=json 2>/dev/null | jq '.enabled.oidc // "NOT INSTALLED"'
+
+      - name: Run tests (${{ matrix.mode }})
        env:
          NEXTCLOUD_HOST: "http://localhost:8080"
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
        run: |
-          uv run pytest -v --log-cli-level=WARN --ignore=tests/manual
+          uv run pytest -v \
+            --log-cli-level=WARN \
+            -m '${{ matrix.markers }}' \
+            -o "addopts=-p no:asyncio" \
+            --timeout=300 \
+            ${{ matrix.extra-args }}
+
+      - name: Collect service logs on failure
+        if: failure()
+        run: docker compose --profile ${{ matrix.profile }} logs --tail=500 > /tmp/docker-compose-logs.txt 2>&1
+
+      - name: Upload debug artifacts
+        if: failure()
+        uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
+        with:
+          name: debug-${{ matrix.mode }}-nc${{ matrix.nextcloud_version }}
+          path: |
+            /tmp/*.png
+            /tmp/docker-compose-logs.txt
+          retention-days: 7
+          if-no-files-found: ignore
@@ -5,8 +5,14 @@ __pycache__/
 .env.local
 .env.*.local

+# Git
+worktrees/
+
 docker-compose.override.yml

 # Generated by pytest used to login users
 .nextcloud_oauth_*.json
 .playwright-mcp/
+
+# RAG Evaluation
+tests/rag_evaluation/fixtures/
@@ -1,6 +1,9 @@
-[submodule "oidc"]
-	path = third_party/oidc
-	url = https://github.com/cbcoutinho/oidc
 [submodule "third_party/oidc"]
 	path = third_party/oidc
 	url = https://github.com/cbcoutinho/oidc
+[submodule "third_party/notes"]
+	path = third_party/notes
+	url = https://github.com/cbcoutinho/notes
+[submodule "third_party/astrolabe"]
+	path = third_party/astrolabe
+	url = https://github.com/cbcoutinho/astrolabe
@@ -1,3 +1,776 @@
+# Changelog - MCP Server
+
+All notable changes to the Nextcloud MCP Server will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [PEP 440](https://peps.python.org/pep-0440/).
+
+## v0.65.0 (2026-03-03)
+
+### Feat
+
+- **auth**: implement OAuth AS proxy to fix audience mismatch (ADR-023)
+- **ci**: add Nextcloud version matrix (NC 31, 32, 33)
+- **helm**: add login-flow auth mode to Helm chart (ADR-022)
+- add Docker Compose profiles and Login Flow v2 service
+
+### Fix
+
+- replace assert with proper guard and invalidate scope cache after provisioning
+- disable NC rate limiting in dev/CI and add token endpoint diagnostics
+- address review feedback — security, caching, CI 429 retry
+- skip keycloak hook when profile inactive and update stale PRM test
+- address remaining PR #589 review findings
+- address PR #589 review findings
+- address PR review issues for Login Flow v2
+- address PR #589 review feedback (round 2)
+- **ci**: remove dev OIDC mount to fix HTTP 500 in single-user/multi-user-basic
+- **ci**: fix health check timeout and per-profile MCP server URL routing
+- **ci**: fix PHP gating, add multi-user-basic matrix entry, upload debug artifacts
+- address PR #589 review feedback for Login Flow v2
+- **ci**: fix integration test collection and skip Playwright in CI
+- **test**: fix 17 pre-existing unit test failures and add astrolabe CI build
+- **ci**: keep third_party mount, always build submodules in CI
+- **ci**: revert accidental third_party mount, use compose override for OIDC
+- **ci**: don't block integration matrix on unit-test failures
+
+## v0.64.5 (2026-03-03)
+
+### Fix
+
+- handle pythonvCard4 dict-format fields and missing phone numbers (#601)
+
+## v0.64.4 (2026-02-26)
+
+### Fix
+
+- **deps**: update dependency icalendar to v7
+
+## v0.64.3 (2026-02-21)
+
+### Fix
+
+- address PR #574 fourth review round
+- address PR #574 third review round
+- address PR #574 second review round
+- address PR #574 review comments
+- wrap raw list returns in response models to produce single TextContent block
+
+## v0.64.2 (2026-02-20)
+
+### Fix
+
+- address PR #571 review comments
+- resolve stale credentials causing astrolabe background sync test failures
+
+### Refactor
+
+- enforce PLC0415 (import-outside-top-level) for source code
+
+## v0.64.1 (2026-02-18)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.26,<1.27
+
+## v0.64.0 (2026-02-16)
+
+### Feat
+
+- add self-signed SSL certificate support for Nextcloud connections
+
+### Fix
+
+- add type: ignore for caldav ssl_verify_cert parameter
+- convert CA bundle path to ssl.SSLContext to avoid httpx deprecation warning
+
+## v0.63.5 (2026-02-16)
+
+### Refactor
+
+- remove stale astrolabe references from commitizen config
+- extract Astrolabe to separate repository
+
+## v0.63.4 (2026-02-08)
+
+### Fix
+
+- strip whitespace from category names when splitting
+- handle categories, recurrence_rule, attendees, and reminder_minutes in update_event
+
+## v0.63.3 (2026-02-08)
+
+### Fix
+
+- expand recurring events in date-range queries
+
+## v0.63.2 (2026-02-07)
+
+### Fix
+
+- use CalDAV time-range filter for calendar date range queries
+
+## v0.63.1 (2026-02-03)
+
+### Fix
+
+- **helm**: add backward compatibility for legacy persistence configs
+
+## v0.63.0 (2026-01-28)
+
+### Feat
+
+- **astrolabe**: add background token refresh job
+
+### Fix
+
+- **astrolabe**: add pagination and psalm fixes for token refresh
+- **astrolabe**: add locking to prevent token refresh race condition
+- **astrolabe**: add issued_at to on-demand token refresh
+
+## v0.62.0 (2026-01-26)
+
+### Feat
+
+- **scripts**: add database query helpers for development
+
+### Fix
+
+- **astrolabe**: resolve Psalm type errors in PDF preview code
+- **astrolabe**: fix Psalm baseline and ESLint import order
+- **astrolabe**: load pdfjs-dist externally to fix PDF viewer
+- **astrolabe**: improve error messages for authorization issues
+- **astrolabe**: rename OAuthController and fix app password check
+- **tests**: improve Astrolabe integration test reliability
+- **astrolabe**: update Plotly title attributes for v3 compatibility
+- **deps**: update dependency plotly.js-dist-min to v3
+
+### Refactor
+
+- **api**: split management.py into domain-focused modules
+- **astrolabe**: replace client-side PDF.js with server-side PyMuPDF rendering
+
+## v0.61.5 (2026-01-17)
+
+### Fix
+
+- **astrolabe**: improve token refresh error handling and validation
+- **astrolabe**: delete stale tokens when refresh fails
+- **astrolabe**: resolve CI failures for code quality checks
+- **astrolabe**: use internal URL for OAuth token refresh
+
+### Refactor
+
+- **astrolabe**: add PHP property types to fix Psalm errors
+- **astrolabe**: upgrade to @nextcloud/vue 9.3.3 API
+
+## v0.61.4 (2026-01-16)
+
+### Fix
+
+- **astrolabe**: Address reviewer feedback for hybrid mode
+- **astrolabe**: Fix NcSelect options and CSS loading
+- **astrolabe**: fix OAuth flow and settings UI for hybrid mode
+- **api**: return OIDC config in hybrid mode for Astrolabe OAuth flow
+
+## v0.61.3 (2026-01-15)
+
+### Fix
+
+- **astrolabe**: address review feedback for Vue 3 bindings
+- **astrolabe**: update Vue component bindings for Vue 3 compatibility
+
+## v0.61.2 (2026-01-15)
+
+### Fix
+
+- **ci**: bump helm chart version when MCP appVersion changes
+
+## v0.61.1 (2026-01-15)
+
+### Fix
+
+- **astrolabe**: define appName and appVersion for @nextcloud/vue
+
+## v0.61.0 (2026-01-14)
+
+### Feat
+
+- Add rate limiting and extract helpers for app password endpoints
+
+### Fix
+
+- Add missing annotations for deck remove/unassign operations
+- **auth**: Store app passwords locally for multi-user BasicAuth background sync
+
+### Refactor
+
+- Use get_settings() for vector sync enabled check
+- Extract storage helper and improve PHP error handling
+
+## v0.60.4 (2026-01-12)
+
+### Fix
+
+- **deck**: use correct endpoint for reorder_card to fix cross-stack moves
+
+## v0.60.3 (2025-12-31)
+
+### Fix
+
+- **deck**: Always preserve fields in update_card for partial updates
+- **astrolabe**: Fix CSS loading for Nextcloud apps
+- **astrolabe**: Fix revoke access button HTTP method mismatch
+
+## v0.60.2 (2025-12-29)
+
+### Fix
+
+- **oauth**: Enable browser OAuth routes for Management API in hybrid mode
+
+## v0.60.1 (2025-12-26)
+
+### Fix
+
+- **mcp**: Move all imports to the top of modules
+
+## v0.60.0 (2025-12-26)
+
+### Feat
+
+- Remove URL rewriting in favor of proper nextcloud config
+- **helm**: migrate to new environment variable naming convention
+- Migrate to vue 3
+- **astrolabe**: upgrade to Vue 3 and @nextcloud/vue 9
+
+### Fix
+
+- **tests**: Add singleton reset fixture to prevent anyio.WouldBlock errors
+- **tests**: Fix integration test failures in qdrant, sampling, and rag tests
+- **auth**: Skip issuer validation for management API tokens
+- Use settings.enable_offline_access for env var consolidation
+- Add required config.py attributes
+- **docker**: remove overwritehost to fix container-to-container DCR
+- **deps**: update dependency @nextcloud/vue to v9
+- **deps**: update dependency vue to v3
+
+### Refactor
+
+- **auth**: Decouple BasicAuth and OAuth authentication strategies
+
+## v0.59.1 (2025-12-22)
+
+### Fix
+
+- **helm**: set OIDC client env vars when using existingSecret
+- **helm**: trigger chart release workflow on helm chart tags
+
+## v0.59.0 (2025-12-22)
+
+### Feat
+
+- **helm**: add support for multi-user BasicAuth mode
+
+### Fix
+
+- **helm**: address PR #447 reviewer feedback
+- **helm**: include MCP server version bumps in changelog pattern
+
+## v0.58.0 (2025-12-22)
+
+### Feat
+
+- **config**: enable DCR for multi-user BasicAuth with offline access
+- **astrolabe**: implement app password provisioning for multi-user background sync
+- **config**: consolidate configuration with smart dependency resolution (ADR-021)
+
+## v0.57.0 (2025-12-20)
+
+### Feat
+
+- **auth**: add multi-user BasicAuth pass-through mode
+- **astrolabe**: add dynamic MCP server configuration for testing
+
+### Fix
+
+- **config**: address reviewer feedback
+
+### Refactor
+
+- **config**: centralize configuration validation and simplify startup
+
+## v0.56.2 (2025-12-20)
+
+### Fix
+
+- **astrolabe**: screenshots in info.xml
+- **astrolabe**: screenshots in info.xml
+
+## v0.56.1 (2025-12-19)
+
+### Fix
+
+- **astrolabe**: Update screenshots
+- **ci**: skip existing Helm chart releases to prevent duplicate release errors
+
+## v0.56.0 (2025-12-19)
+
+### Feat
+
+- **ci**: add --increment flag to bump scripts for manual version control
+
+### Fix
+
+- **astrolabe**: add contents:write permission to appstore workflow
+- **astrolabe**: update commitizen pattern to properly update info.xml version
+- **astrolabe**: prevent workflow failure when only helm/astrolabe commits exist
+- **astrolabe**: info.xml
+
+## v0.55.1 (2025-12-19)
+
+### Fix
+
+- **ci**: push all tags explicitly in bump workflow
+
+## v0.55.0 (2025-12-19)
+
+### BREAKING CHANGE
+
+- MCP server now bumps for ANY conventional commit except
+those explicitly scoped to helm or astrolabe.
+
+### Feat
+
+- **ci**: implement monorepo-aware version bumping workflow
+
+### Fix
+
+- **ci**: make MCP server default bump target for all non-scoped commits
+- **ci**: restrict docker build to MCP server tags only
+- **ci**: correct appstore-push-action version to v1.0.4
+
+## v0.54.0 (2025-12-19)
+
+### Feat
+
+- **astrolabe**: add Nextcloud App Store deployment automation
+- configure commitizen monorepo with independent versioning
+
+### Fix
+
+- **ci**: improve versioning and error handling
+- **ci**: address critical workflow and validation issues
+- **astrolabe**: address code review feedback
+
+## v0.53.0 (2025-12-19)
+
+### Feat
+
+- add Alembic database migration system
+- make chunk modal title clickable link to documents
+- add native Plotly hover styling for clickable points
+- add click interactivity to Plotly 3D scatter chart
+- improve chunk viewer with fixed navigation and markdown rendering
+- **astrolabe**: enable multi-select for document types and refactor PDF viewer
+- **auth**: implement refresh token rotation for Nextcloud OIDC
+- **astrolabe**: enhance unified search and add webhook management
+- **astrolabe**: add webhook management UI to admin settings
+- **astrolabe**: add OAuth token refresh and webhook presets
+- **search**: add file_path metadata and chunk offsets to search results
+- **astrolabe**: use proper icons and thumbnails in unified search
+- **astrolabe**: add admin search settings and enhanced UI
+- **astrolabe**: add unified search provider with clickable file links
+- **astrolabe**: add 3D PCA visualization for semantic search
+- **astrolabe**: add Nextcloud PHP app for MCP server management
+- **vector-sync**: enable background sync in OAuth mode
+
+### Fix
+
+- **security**: address critical security issues from PR #401 code review
+- **oauth**: enable PKCE for all clients and add token_broker to oauth_context
+- **astrolabe**: revert invalid files_pdfviewer URL for file links
+- resolve type checking warnings for CI
+- move Alembic to package submodule for Docker compatibility
+- update unified search results to match chunk viz display
+- **astrolabe**: handle OAuth refresh token rotation
+- address critical code review issues (4 fixes)
+- resolve CI linting issues for Astroglobe
+
+### Refactor
+
+- **astrolabe**: extract PDF viewer to dedicated component
+- **astrolabe**: reframe UI as semantic search service
+
+## v0.52.1 (2025-12-13)
+
+### Perf
+
+- **deck**: optimize card lookup by storing board_id/stack_id in metadata
+
+## v0.52.0 (2025-12-13)
+
+### Feat
+
+- **vector**: add Deck card vector search with visualization support
+
+## v0.51.0 (2025-12-13)
+
+### Feat
+
+- **vector-viz**: add news_item support for links and chunk expansion
+
+## v0.50.2 (2025-12-13)
+
+### Fix
+
+- **news**: revert get_item() to use get_items() + filter
+
+## v0.50.1 (2025-12-12)
+
+### Fix
+
+- Disable DNS rebinding protection for containerized deployments
+- **deps**: update dependency mcp to >=1.23,<1.24
+
+## v0.50.0 (2025-12-11)
+
+### Feat
+
+- add MCP tool annotations for enhanced UX
+
+### Fix
+
+- address PR review feedback
+
+## v0.49.2 (2025-12-09)
+
+### Fix
+
+- Update lockfile
+
+## v0.49.1 (2025-12-09)
+
+### Fix
+
+- Revert mcp version <1.23
+
+## v0.49.0 (2025-12-08)
+
+### Feat
+
+- **news**: add Nextcloud News app integration
+
+### Fix
+
+- resolve all type checking errors (8 errors fixed)
+
+### Refactor
+
+- **news**: simplify vector sync to fetch all items
+
+### Perf
+
+- **news**: use direct API endpoint for get_item()
+
+## v0.48.6 (2025-12-03)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.23,<1.24
+
+## v0.48.5 (2025-11-28)
+
+### Fix
+
+- **deps**: update dependency pillow to v12
+
+## v0.48.4 (2025-11-23)
+
+### Fix
+
+- Add rate limit retry logic to OpenAI provider
+
+## v0.48.3 (2025-11-23)
+
+### Fix
+
+- Increase MCP sampling timeout to 5 minutes for slower LLMs
+
+## v0.48.2 (2025-11-23)
+
+### Fix
+
+- Share vector sync state with FastMCP session lifespan via module singleton
+- Share vector sync state with FastMCP session lifespan via module singleton
+
+## v0.48.1 (2025-11-23)
+
+### Fix
+
+- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
+
+### Refactor
+
+- Move background tasks to server lifespan and deprecate SSE transport
+
+## v0.48.0 (2025-11-23)
+
+### Feat
+
+- Add tag management methods to WebDAV client
+
+## v0.47.0 (2025-11-23)
+
+### Feat
+
+- Add OpenAI provider support for embeddings and generation
+
+## v0.46.2 (2025-11-22)
+
+### Fix
+
+- **smithery**: Enable JSON response format for scanner compatibility
+
+## v0.46.1 (2025-11-22)
+
+### Perf
+
+- Optimize vector viz search performance
+
+## v0.46.0 (2025-11-22)
+
+### Feat
+
+- Add Smithery CLI deployment support
+- Implement ADR-016 Smithery stateless deployment mode
+
+### Fix
+
+- **smithery**: Add JSON Schema metadata to mcp-config endpoint
+- **smithery**: Use container runtime pattern for config discovery
+- Add Smithery lifespan and auth mode detection
+
+## v0.45.0 (2025-11-22)
+
+### Feat
+
+- Add context expansion to semantic search with chunk overlap removal
+- Use Ollama native batch API in embed_batch()
+- Implement Qdrant placeholder state management
+- Switch files to use numeric IDs with file_path resolution
+- Implement per-chunk vector visualization with context expansion
+
+### Fix
+
+- Use alpha_composite for proper RGBA highlight blending
+- Remove pymupdf.layout.activate() to fix page_chunks behavior
+- Centralize PDF processing and generate separate images per chunk
+- Set is_placeholder=False in processor to fix search filtering
+- Increase placeholder staleness threshold to 5x scan interval
+- Add placeholder staleness check to prevent duplicate processing
+- Use empty SparseVector instead of None for placeholders
+- Return empty array instead of null for query_coords when no results
+- Align PDF text extraction between indexing and context expansion
+- Update models and viz to use int-only doc_id
+- Reconstruct full content for notes to match indexed offsets
+- Add async/await, PDF metadata, and type safety fixes
+
+### Refactor
+
+- Simplify PDF text extraction with single to_markdown call
+
+### Perf
+
+- Optimize PDF processing with parallel extraction and single-render highlights
+
+## v0.44.1 (2025-11-21)
+
+### Fix
+
+- **deps**: update dependency mcp to >=1.22,<1.23
+
+## v0.44.0 (2025-11-19)
+
+### Feat
+
+- Improve vector visualization with static assets and fixes
+- Redesign UI to match Nextcloud ecosystem aesthetic
+
+### Fix
+
+- Improve 3D plot rendering with explicit dimensions and window resize support
+- Preserve 3D plot camera and improve documentation
+- Preserve 3D plot camera position and fix CSS loading
+
+## v0.43.0 (2025-11-18)
+
+### Feat
+
+- Replace custom document chunker with LangChain MarkdownTextSplitter
+
+## v0.42.0 (2025-11-17)
+
+### Feat
+
+- **viz**: Add dual-score display and improve UI controls
+
+## v0.41.0 (2025-11-17)
+
+### Feat
+
+- add configurable fusion algorithms for BM25 hybrid search
+- add chunk position tracking to vector indexing and search
+- add vector viz template and chunk context endpoint
+
+### Fix
+
+- prevent infinite loop in DocumentChunker with position tracking
+- Relax SearchResult validation to support DBSF fusion scores > 1.0
+
+## v0.40.0 (2025-11-16)
+
+### Feat
+
+- add unified provider architecture with Amazon Bedrock support
+
+### Fix
+
+- suppress Starlette middleware type warnings in ty checker
+
+## v0.39.0 (2025-11-16)
+
+### Feat
+
+- Implement BM25 hybrid search with native Qdrant RRF fusion
+
+### Fix
+
+- Handle named vectors in visualization and semantic search
+- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
+- Update viz routes to use BM25 hybrid search after refactor
+
+## v0.38.0 (2025-11-16)
+
+### Feat
+
+- add concurrent uploads and --force flag to upload command
+- implement RAG evaluation framework with CLI tooling
+
+### Fix
+
+- download qrels from BEIR ZIP instead of HuggingFace
+
+### Refactor
+
+- migrate asyncio to anyio for consistent structured concurrency
+- replace httpx client with NextcloudClient in upload command
+
+### Perf
+
+- Eliminate double-fetching in semantic search sampling
+- fix vector viz search performance and visual encoding
+- make note deletion concurrent in upload --force
+
+## v0.37.0 (2025-11-16)
+
+### Feat
+
+- Add OpenTelemetry tracing to @instrument_tool decorator
+
+## v0.36.0 (2025-11-15)
+
+### BREAKING CHANGE
+
+- Search algorithms now require Qdrant to be populated.
+Vector sync must be enabled and documents indexed for search to work.
+
+### Feat
+
+- Normalize hybrid search RRF scores to 0-1 range
+- Enhance vector visualization UI and parallelize search verification
+- Add Vector Viz tab to app home page
+- Add vector visualization pane with multi-select document types
+- Implement custom PCA to remove sklearn dependency
+- Add multi-document Protocol with cross-app search support
+- Update nc_semantic_search tool with algorithm selection
+- Implement unified search algorithm module
+
+### Fix
+
+- Reorder tabs and fix viz pane session access
+
+### Refactor
+
+- Optimize Nextcloud access verification with centralized filtering
+- Make all search algorithms query Qdrant payload, not Nextcloud
+
+### Perf
+
+- Exclude vector-sync status polling from distributed tracing
+
+## v0.35.0 (2025-11-15)
+
+### Feat
+
+- Enable SSE transport for mcp service and update test fixtures
+
+## v0.34.2 (2025-11-13)
+
+### Fix
+
+- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
+
+## v0.34.1 (2025-11-13)
+
+### Fix
+
+- return all notes when search query is empty
+
+## v0.34.0 (2025-11-13)
+
+### Feat
+
+- Complete Phase 5 - Instrument all 93 MCP tools
+- Add instrumentation decorator and apply to notes tools (Phase 5)
+- Add OAuth token and database metrics (Phases 3-4)
+- Add metrics instrumentation for queue, health, and database operations
+
+## v0.33.1 (2025-11-13)
+
+### Fix
+
+- Move grafana_folder from labels to annotations
+
+## v0.33.0 (2025-11-13)
+
+### Feat
+
+- Add Grafana dashboard and vector sync metric instrumentation
+
+## v0.32.1 (2025-11-12)
+
+### Fix
+
+- add dynamic dimension detection for Ollama embedding models
+
+## v0.32.0 (2025-11-11)
+
+### Feat
+
+- **ollama**: Pull model on startup if not available in ollama
+- add dynamic vector sync status updates with htmx polling
+- add webhook management UI and BeforeNodeDeletedEvent support
+- validate Nextcloud webhook schemas and document findings
+
+### Fix
+
+- improve webapp tab UI with CSS Grid and viewport-filling container
+
+### Refactor
+
+- move webapp from /user/page to /app
+- consolidate database storage for webhooks and OAuth tokens
+
 ## v0.31.1 (2025-11-10)

 ### Refactor
@@ -5,23 +5,29 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
 ## Coding Conventions

 ### async/await Patterns
- **Use anyio + asyncio hybrid** - Both libraries are available
+- **Use anyio for all async operations** - Provides structured concurrency
  - pytest runs in `anyio` mode (`anyio_mode = "auto"` in pyproject.toml)
-  - asyncio used in auth modules (refresh_token_storage.py, token_exchange.py, token_broker.py)
-  - anyio used in calendar.py, client_registration.py, app.py
+  - Use `anyio.create_task_group()` for concurrent execution (NOT `asyncio.gather()`)
+  - Use `anyio.Lock()` for synchronization primitives (NOT `asyncio.Lock()`)
+  - Use `anyio.run()` for entry points (NOT `asyncio.run()`)
  - Prefer standard async/await syntax without explicit library imports when possible
+  - Examples: app.py, search/hybrid.py, search/verification.py, auth/token_broker.py

 ### Type Hints
 - **Use Python 3.10+ union syntax**: `str | None` instead of `Optional[str]`
 - **Use lowercase generics**: `dict[str, Any]` instead of `Dict[str, Any]`
 - **Type all function signatures** - Parameters and return types
- **No explicit type checker configured** - Ruff handles linting only
+- **Type checker**: `ty` is configured for static type checking
+  ```bash
+  uv run ty check -- nextcloud_mcp_server
+  ```

 ### Code Quality
- **Run ruff before committing**:
+- **Run ruff and ty before committing**:
  ```bash
  uv run ruff check
  uv run ruff format
+  uv run ty check -- nextcloud_mcp_server
  ```
 - **Ruff configuration** in pyproject.toml (extends select: ["I"] for import sorting)

@@ -50,13 +56,127 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
  - Pass-through (default): Simple, stateless (ENABLE_TOKEN_EXCHANGE=false)
  - Token exchange (opt-in): RFC 8693 delegation (ENABLE_TOKEN_EXCHANGE=true)

+### MCP Tool Annotations (ADR-017)
+
+**All tools MUST include annotations** following these patterns:
+
+```python
+from mcp.types import ToolAnnotations
+
+# Read-only tools (list, search, get)
+@mcp.tool(
+    title="Human Readable Name",
+    annotations=ToolAnnotations(
+        readOnlyHint=True,
+        openWorldHint=True,  # Nextcloud is external to MCP server
+    ),
+)
+
+# Create operations
+@mcp.tool(
+    title="Create Resource",
+    annotations=ToolAnnotations(
+        idempotentHint=False,  # Creates new resources each time
+        openWorldHint=True,
+    ),
+)
+
+# Update operations (with etag/version control)
+@mcp.tool(
+    title="Update Resource",
+    annotations=ToolAnnotations(
+        idempotentHint=False,  # ETag changes = different inputs
+        openWorldHint=True,
+    ),
+)
+
+# Delete operations
+@mcp.tool(
+    title="Delete Resource",
+    annotations=ToolAnnotations(
+        destructiveHint=True,   # Permanently deletes data
+        idempotentHint=True,    # Same end state if called repeatedly
+        openWorldHint=True,
+    ),
+)
+
+# HTTP PUT without version control (special case)
+@mcp.tool(
+    title="Write File",
+    annotations=ToolAnnotations(
+        idempotentHint=True,  # Same content = same end state
+        openWorldHint=True,
+    ),
+)
+```
+
+**Key Principles**:
+- **Idempotency**: Same inputs → same result. ETags change after updates, making them non-idempotent
+- **Destructive**: Operations that permanently delete/overwrite data
+- **Open World**: All Nextcloud tools access external service (openWorldHint=True)
+- **Titles**: Use human-readable names, not snake_case function names
+
+**See**: `docs/ADR-017-mcp-tool-annotations.md` for detailed rationale and examples
+
 ### Project Structure
 - `nextcloud_mcp_server/client/` - HTTP clients for Nextcloud APIs
 - `nextcloud_mcp_server/server/` - MCP tool/resource definitions
 - `nextcloud_mcp_server/auth/` - OAuth/OIDC authentication
 - `nextcloud_mcp_server/models/` - Pydantic response models
+- `nextcloud_mcp_server/providers/` - Unified LLM provider infrastructure (embeddings + generation)
 - `tests/` - Layered test suite (unit, smoke, integration, load)

+### Provider Architecture (ADR-015)
+
+**Unified Provider System** for embeddings and text generation:
+
+**Location:** `nextcloud_mcp_server/providers/`
+- `base.py` - `Provider` ABC with optional capabilities
+- `registry.py` - Auto-detection and factory pattern
+- `ollama.py` - Ollama provider (embeddings + generation)
+- `anthropic.py` - Anthropic provider (generation only)
+- `bedrock.py` - Amazon Bedrock provider (embeddings + generation)
+- `simple.py` - Simple in-memory provider (embeddings only, fallback)
+
+**Usage:**
+```python
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Auto-detects from environment
+
+# Check capabilities
+if provider.supports_embeddings:
+    embeddings = await provider.embed_batch(texts)
+
+if provider.supports_generation:
+    text = await provider.generate("prompt", max_tokens=500)
+```
+
+**Environment Variables:**
+
+Bedrock:
+- `AWS_REGION` - AWS region (e.g., "us-east-1")
+- `BEDROCK_EMBEDDING_MODEL` - Embedding model ID (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL` - Generation model ID (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+- `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY` - Optional, uses AWS credential chain
+
+Ollama:
+- `OLLAMA_BASE_URL` - API URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL` - Embedding model (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL` - Generation model (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL` - SSL verification (default: "true")
+
+Simple (fallback, no config needed):
+- `SIMPLE_EMBEDDING_DIMENSION` - Dimension (default: 384)
+
+**Auto-Detection Priority:** Bedrock → Ollama → Simple
+
+**Backward Compatibility:**
+- Old code using `nextcloud_mcp_server.embedding.get_embedding_service()` still works
+- `EmbeddingService` now wraps `get_provider()` internally
+
+**For Details:** See `docs/ADR-015-unified-provider-architecture.md`
+
 ## Development Commands (Quick Reference)

 ### Testing
@@ -119,6 +239,25 @@ uv run python -m tests.load.benchmark --output results.json --verbose

 **Credentials**: root/password, nextcloud/password, database: `nextcloud`

+### Quick Query Script (Recommended for Agents)
+
+Use `scripts/dbquery.py` for single SQL statements without requiring approval for each `docker compose exec`:
+
+```bash
+# Basic query
+./scripts/dbquery.py "SELECT COUNT(*) FROM oc_users"
+
+# Vertical output (one column per line) - useful for wide tables
+./scripts/dbquery.py -E "SELECT * FROM oc_oidc_clients LIMIT 1"
+
+# With different credentials
+./scripts/dbquery.py -u nextcloud -p nextcloud "SHOW TABLES"
+```
+
+### Direct Docker Access
+
+For interactive sessions or complex operations:
+
 ```bash
 # Connect to database
 docker compose exec db mariadb -u root -ppassword nextcloud
@@ -144,6 +283,40 @@ docker compose exec db mariadb -u root -ppassword nextcloud -e \
 - `oc_oidc_registration_tokens` - RFC 7592 registration tokens
 - `oc_oidc_redirect_uris` - Redirect URIs

+### SQLite Databases (MCP Services)
+
+Use `scripts/sqlitequery.py` to query SQLite databases in MCP service containers:
+
+```bash
+# List tables
+./scripts/sqlitequery.py ".tables"
+
+# Query specific service
+./scripts/sqlitequery.py -s oauth "SELECT * FROM refresh_tokens"
+./scripts/sqlitequery.py -s keycloak "SELECT * FROM oauth_clients"
+./scripts/sqlitequery.py -s basic "SELECT * FROM app_passwords"
+
+# With column headers
+./scripts/sqlitequery.py --column "SELECT * FROM audit_logs LIMIT 5"
+
+# JSON output
+./scripts/sqlitequery.py --json "SELECT * FROM oauth_sessions"
+
+# View schema
+./scripts/sqlitequery.py -s oauth ".schema refresh_tokens"
+```
+
+**Services**: `mcp` (default), `oauth`, `keycloak`, `basic`
+
+**SQLite Tables**:
+- `refresh_tokens` - OAuth refresh tokens with user profiles
+- `audit_logs` - Security audit trail
+- `oauth_clients` - DCR OAuth client credentials
+- `oauth_sessions` - OAuth flow session state
+- `registered_webhooks` - Webhook registrations
+- `app_passwords` - Multi-user BasicAuth passwords
+- `alembic_version` - Migration tracking
+
 ## Architecture Quick Reference

 **For detailed architecture, see:**
@@ -386,6 +559,29 @@ docker compose exec app php occ user_oidc:provider keycloak
 **Nextcloud**: `docker compose exec app php occ ...` for occ commands
 **MariaDB**: `docker compose exec db mariadb -u [user] -p [password] [database]` for queries

+### Querying Nextcloud Application Logs
+
+**Use this pattern** to inspect Nextcloud application logs during debugging:
+
+```bash
+# View recent log entries
+docker compose exec app cat /var/www/html/data/nextcloud.log | jq | tail
+
+# Filter by app
+docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.app == "astrolabe")' | tail
+
+# Filter by log level (0=DEBUG, 1=INFO, 2=WARN, 3=ERROR, 4=FATAL)
+docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.level >= 3)' | tail
+
+# Search for specific messages
+docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.message | contains("OAuth"))' | tail -20
+
+# View full exception traces
+docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.exception != null)' | tail -5
+```
+
+**Log Structure**: Each entry is a JSON object with fields: `reqId`, `level`, `time`, `remoteAddr`, `user`, `app`, `method`, `url`, `message`, `userAgent`, `version`, `exception`
+
 **For detailed setup, see**:
 - `docs/installation.md` - Installation guide
 - `docs/configuration.md` - Configuration options
@@ -0,0 +1,106 @@
+# Contributing to Nextcloud MCP Server
+
+## Version Management
+
+This monorepo uses commitizen for version management with **independent versioning** for two components:
+
+### Components
+
+| Component | Scope | Bump Command | Tag Example |
+|-----------|-------|--------------|-------------|
+| MCP Server | `mcp` or none | `./scripts/bump-mcp.sh` | `v0.54.0` |
+| Helm Chart | `helm` | `./scripts/bump-helm.sh` | `nextcloud-mcp-server-0.54.0` |
+
+> **Note:** The Astrolabe Nextcloud app has been moved to its own repository at [cbcoutinho/astrolabe](https://github.com/cbcoutinho/astrolabe).
+
+### Commit Message Format
+
+Use conventional commits with **scopes** to target specific components:
+
+```bash
+# MCP server changes
+feat(mcp): add calendar sync API
+fix(mcp): resolve authentication bug
+
+# Helm chart changes
+feat(helm): add resource limits
+docs(helm): update values documentation
+```
+
+**Unscoped commits** default to the MCP server:
+```bash
+feat: add new feature  # → MCP server (v0.54.0)
+```
+
+### Release Workflow
+
+#### 1. Make Changes with Scoped Commits
+
+```bash
+git commit -m "feat(helm): add ingress annotations"
+git commit -m "feat(mcp): add calendar sync"
+```
+
+#### 2. Bump Component Versions
+
+```bash
+# Bump MCP server (reads commits with scope=mcp or unscoped)
+./scripts/bump-mcp.sh
+# → Creates tag: v0.54.0
+# → Updates: pyproject.toml, Chart.yaml:appVersion
+
+# Bump Helm chart (reads commits with scope=helm)
+./scripts/bump-helm.sh
+# → Creates tag: nextcloud-mcp-server-0.54.0
+# → Updates: Chart.yaml:version
+
+```
+
+#### 3. Push Tags
+
+```bash
+git push --follow-tags
+```
+
+### Changelog Filtering
+
+Each component maintains its own `CHANGELOG.md`:
+
+- **MCP Server**: `CHANGELOG.md` (root) - includes `feat(mcp):` and unscoped commits
+- **Helm Chart**: `charts/nextcloud-mcp-server/CHANGELOG.md` - includes `feat(helm):` only
+
+### Manual Version Bumps
+
+For specific increments:
+
+```bash
+# Patch bump (0.53.0 → 0.53.1)
+uv run cz bump --increment PATCH
+
+# Minor bump (0.53.0 → 0.54.0)
+uv run cz bump --increment MINOR
+
+# Major bump (0.53.0 → 1.0.0)
+uv run cz bump --increment MAJOR
+
+# For non-MCP components, use --config
+cd charts/nextcloud-mcp-server
+uv run cz --config .cz.toml bump --increment MINOR
+```
+
+### Versioning Philosophy
+
+- **MCP Server**: Follows PEP 440, `major_version_zero = true` (0.x.x for pre-1.0)
+- **Helm Chart**: Follows PEP 440, starts at 0.53.0 (continues from current)
+
+### Chart.yaml Version vs appVersion
+
+The Helm chart has TWO version fields:
+
+- **`version`**: Chart packaging version (bumped by `feat(helm):`)
+  - Example: `0.53.0` → `0.54.0` when adding resource limits
+
+- **`appVersion`**: MCP server version being deployed (bumped by `feat(mcp):`)
+  - Example: `"0.53.0"` → `"0.54.0"` when MCP server releases
+
+This allows the chart to evolve independently from the application.
@@ -1,17 +1,28 @@
-FROM ghcr.io/astral-sh/uv:0.9.8-python3.11-alpine@sha256:6c842c49ad032f46b62f32a7e7779f45f12671a8e0d82ea24c766ab62d58b396
+FROM docker.io/library/python:3.12-slim-trixie@sha256:f3fa41d74a768c2fce8016b98c191ae8c1bacd8f1152870a3f9f87d350920b7c
+
+COPY --from=ghcr.io/astral-sh/uv:0.10.7@sha256:edd1fd89f3e5b005814cc8f777610445d7b7e3ed05361f9ddfae67bebfe8456a /uv /uvx /bin/

 # Install dependencies
 # 1. git (required for caldav dependency from git)
 # 2. sqlite for development with token db
-RUN apk add --no-cache git sqlite
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git \
+    tesseract-ocr \
+    sqlite3 && apt clean

 WORKDIR /app

+COPY pyproject.toml uv.lock README.md .
+
+RUN uv sync --locked --no-dev --no-install-project --no-cache
+
 COPY . .

-RUN uv sync --locked --no-dev --no-editable
+RUN uv sync --locked --no-dev --no-editable --no-cache

 ENV PYTHONUNBUFFERED=1
 ENV VIRTUAL_ENV=/app/.venv
+ENV PATH=/app/.venv/bin:$PATH
+ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata

-ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
+ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "run", "--host", "0.0.0.0"]
@@ -0,0 +1,44 @@
+# Dockerfile for Smithery stateless deployment
+# ADR-016: Stateless mode for multi-user public Nextcloud instances
+#
+# This image excludes:
+# - Vector database dependencies (qdrant-client)
+# - Background sync workers
+# - Admin UI routes (/app)
+# - Semantic search tools
+#
+# Features included:
+# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
+# - Per-session app password authentication
+# - Multi-user support via Smithery session config
+
+FROM docker.io/library/python:3.12-slim-trixie@sha256:f3fa41d74a768c2fce8016b98c191ae8c1bacd8f1152870a3f9f87d350920b7c
+
+WORKDIR /app
+
+# Install uv for fast dependency management
+COPY --from=ghcr.io/astral-sh/uv:0.10.7@sha256:edd1fd89f3e5b005814cc8f777610445d7b7e3ed05361f9ddfae67bebfe8456a /uv /uvx /bin/
+
+# Install dependencies
+# 1. git (required for caldav dependency from git)
+# 2. sqlite for development with token db
+RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
+    git
+
+# Copy project files
+COPY . .
+
+RUN uv sync --locked --no-dev --no-editable --no-cache
+
+# Set Smithery mode environment variables
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081 by default
+EXPOSE 8081
+
+# Health check endpoint
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
+
+CMD ["/app/.venv/bin/smithery-main"]
@@ -1,6 +1,11 @@
+<p align="center">
+  <img src="astrolabe.svg" alt="Nextcloud MCP Server" width="128" height="128">
+</p>
+
 # Nextcloud MCP Server

 [![Docker Image](https://img.shields.io/badge/docker-ghcr.io/cbcoutinho/nextcloud--mcp--server-blue)](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
+[![smithery badge](https://smithery.ai/badge/@cbcoutinho/nextcloud-mcp-server)](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)

 **A production-ready MCP server that connects AI assistants to your Nextcloud instance.**

@@ -13,7 +18,20 @@ This is a **dedicated standalone MCP server** designed for external MCP clients

 ## Quick Start

-Get up and running in 60 seconds using Docker:
+The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
+
+1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
+2. Click "Deploy" and configure:
+   - **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
+   - **Username**: Your Nextcloud username
+   - **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
+
+> [!NOTE]
+> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
+
+## Docker (Self-Hosted)
+
+For full features including semantic search, run with Docker:

 ```bash
 # 1. Create a minimal configuration
@@ -29,10 +47,24 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \

 # 3. Test the connection
 curl http://127.0.0.1:8000/health/ready
+
+# 4. Connect to the endpoint
+http://127.0.0.1:8000/sse
+
+# Or with --transport streamable-http
+http://127.0.0.1:8000/mcp
+```
+
+**Docker Compose Profiles** (for development/testing):
+
+```bash
+docker compose --profile single-user up -d       # Port 8000
+docker compose --profile multi-user-basic up -d   # Port 8003
+docker compose --profile oauth up -d              # Port 8001
+docker compose --profile login-flow up -d         # Port 8004
 ```

 **Next Steps:**
- Create an app password in Nextcloud: Settings → Security → Devices & sessions
 - Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
 - See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)

@@ -40,7 +72,7 @@ curl http://127.0.0.1:8000/health/ready

 - **90+ MCP Tools** - Comprehensive API coverage across 8 Nextcloud apps
 - **MCP Resources** - Structured data URIs for browsing Nextcloud data
- **Semantic Search (Experimental)** - Optional vector-powered search for Notes (requires Qdrant + Ollama)
+- **Semantic Search (Experimental)** - Optional vector-powered search for Notes, Files, News items, and Deck cards (requires Qdrant + Ollama)
 - **Document Processing** - OCR and text extraction from PDFs, DOCX, images with progress notifications
 - **Flexible Deployment** - Docker, Kubernetes (Helm), VM, or local installation
 - **Production-Ready Auth** - Basic Auth with app passwords (recommended) or OAuth2/OIDC (experimental)
@@ -58,7 +90,7 @@ curl http://127.0.0.1:8000/health/ready
 | **Cookbook** | 13 | Recipe management, URL import (schema.org) |
 | **Tables** | 5 | Row operations on Nextcloud Tables |
 | **Sharing** | 10+ | Create and manage shares |
-| **Semantic Search** | 2+ | Vector search for Notes (experimental, opt-in, requires infrastructure) |
+| **Semantic Search** | 2+ | Vector search for Notes, Files, News items, and Deck cards (experimental, opt-in, requires infrastructure) |

 Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request!

@@ -76,19 +108,33 @@ Want to see another Nextcloud app supported? [Open an issue](https://github.com/

 ### Authentication Modes

-The server supports two authentication modes:
+The server supports four authentication modes:

-**Single-User Mode (BasicAuth):**
+**Single-User (BasicAuth):**
 - One set of credentials shared by all MCP clients
 - Simple setup: username + app password in environment variables
 - All clients access Nextcloud as the same user
 - Best for: Personal use, development, single-user deployments

-**Multi-User Mode (OAuth):**
+**Multi-User (BasicAuth Pass-Through):**
+- MCP clients send credentials via Authorization header
+- Server passes through to Nextcloud (stateless by default)
+- Optional offline access for background operations (`ENABLE_MULTI_USER_BASIC_AUTH=true`)
+- Best for: Multi-user setups without OAuth infrastructure
+
+**Multi-User (OAuth):**
 - Each MCP client authenticates separately with their own Nextcloud account
 - Per-user scopes and permissions (clients only see tools they're authorized for)
 - More secure: tokens expire, credentials never shared with server
 - Best for: Teams, multi-user deployments, production environments with multiple users
+- Requires: Patches to the `user_oidc` app (experimental)
+
+**Multi-User (Login Flow v2):**
+- Uses Nextcloud's native Login Flow v2 to obtain per-user app passwords
+- No OAuth patches required — works with stock Nextcloud
+- Each user authenticates via browser, server manages app passwords
+- Best for: Multi-user deployments without OAuth infrastructure (`ENABLE_LOGIN_FLOW=true`)
+- Experimental: See [ADR-022](docs/ADR-022-deployment-mode-consolidation.md) for details

 See [docs/authentication.md](docs/authentication.md) for detailed setup instructions.

@@ -104,7 +150,7 @@ This enables natural language queries and helps discover related content across

 > [!NOTE]
 > **Semantic Search is experimental and opt-in:**
-> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
+> - Disabled by default (`ENABLE_SEMANTIC_SEARCH=false`)
 > - Currently supports Notes app only (multi-app support planned)
 > - Requires additional infrastructure: vector database + embedding service
 > - Answer generation (`nc_semantic_search_answer`) requires MCP client sampling support
@@ -122,7 +168,8 @@ This enables natural language queries and helps discover related content across
 ### Features
 - **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
 - **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
+- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes, Files, News items, Deck cards; opt-in)
+- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing

 ### Advanced Topics
 - **[OAuth Architecture](docs/oauth-architecture.md)** - How OAuth works (experimental)
@@ -0,0 +1,90 @@
+# Alembic configuration file for nextcloud-mcp-server
+
+[alembic]
+# Path to migration scripts
+script_location = nextcloud_mcp_server/alembic
+
+# Template used to generate migration file names
+# Default: %%(rev)s_%%(slug)s
+file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
+
+# Timezone for migration timestamps
+# Default: utc
+timezone = utc
+
+# Max length of characters to apply to the "slug" field
+# Default: 40
+# truncate_slug_length = 40
+
+# Set to 'true' to run the environment during the 'revision' command
+# Default: false
+# revision_environment = false
+
+# Set to 'true' to allow .pyc and .pyo files without a source .py file
+# Default: false
+# sourceless = false
+
+# Version location specification
+# Supports single or multiple directories
+version_locations = nextcloud_mcp_server/alembic/versions
+
+# Path separator for version locations (required to suppress deprecation warning)
+# Use os (for cross-platform compatibility)
+path_separator = os
+
+# Set to 'true' to search source files recursively in each "version_locations" directory
+# Default: false
+# recursive_version_locations = false
+
+# Output encoding used when revision files are written
+# Default: utf-8
+# output_encoding = utf-8
+
+# Database URL - can be overridden by:
+# 1. Passing -x database_url=... to alembic commands
+# 2. Setting in environment via get_database_url() in env.py
+# Default: sqlite:///app/data/tokens.db
+sqlalchemy.url = sqlite+aiosqlite:////app/data/tokens.db
+
+[post_write_hooks]
+# Post-write hooks allow you to run scripts after generating migration files
+# Example: format migrations with ruff
+# hooks = ruff
+# ruff.type = exec
+# ruff.executable = ruff
+# ruff.options = format REVISION_SCRIPT_FILENAME
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
@@ -0,0 +1,71 @@
+Database Migrations for nextcloud-mcp-server
+============================================
+
+This directory contains Alembic database migrations for the token storage database.
+
+Structure
+---------
+- env.py: Alembic environment configuration
+- script.py.mako: Template for generating new migration files
+- versions/: Directory containing migration scripts
+
+Usage
+-----
+Migrations are managed via the CLI:
+
+    # Upgrade database to latest version
+    uv run nextcloud-mcp-server db upgrade
+
+    # Show current database version
+    uv run nextcloud-mcp-server db current
+
+    # Show migration history
+    uv run nextcloud-mcp-server db history
+
+    # Create a new migration (developers only)
+    uv run nextcloud-mcp-server db migrate "description of changes"
+
+    # Downgrade database by one version (emergency use only)
+    uv run nextcloud-mcp-server db downgrade
+
+Direct Alembic Usage
+--------------------
+You can also use Alembic commands directly:
+
+    # Specify database URL via -x flag
+    uv run alembic -x database_url=sqlite+aiosqlite:////path/to/tokens.db upgrade head
+
+    # Or set in alembic.ini and run
+    uv run alembic upgrade head
+    uv run alembic current
+    uv run alembic history
+
+Writing Migrations
+------------------
+Since we don't use SQLAlchemy models, migrations are written with raw SQL:
+
+    def upgrade() -> None:
+        op.execute("""
+            ALTER TABLE refresh_tokens
+            ADD COLUMN new_field TEXT
+        """)
+
+    def downgrade() -> None:
+        # SQLite doesn't support DROP COLUMN, use table recreation
+        op.execute("""
+            CREATE TABLE refresh_tokens_new AS
+            SELECT user_id, encrypted_token, ... FROM refresh_tokens
+        """)
+        op.execute("DROP TABLE refresh_tokens")
+        op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
+
+Migration File Naming
+---------------------
+Format: YYYYMMDD_HHMM_<revision>_<slug>.py
+Example: 20251217_2200_001_initial_schema.py
+
+Notes
+-----
+- Migrations run automatically when RefreshTokenStorage.initialize() is called
+- Existing databases are automatically stamped with the initial version
+- SQLite has limited ALTER TABLE support - complex changes require table recreation
@@ -0,0 +1,26 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    """Apply migration changes to upgrade the database schema."""
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    """Revert migration changes to downgrade the database schema."""
+    ${downgrades if downgrades else "pass"}
@@ -3,3 +3,9 @@
 set -euox pipefail

 php /var/www/html/occ config:system:set trusted_domains 2 --value=host.docker.internal
+
+# Set overwrite.cli.url to the external URL for OIDC discovery
+# This ensures OAuth flows redirect to the correct external URL
+# Important: The Astrolabe OAuth controller makes internal HTTP requests to /.well-known/openid-configuration
+# which needs to return URLs reachable by external browsers (localhost:8080, not localhost:80)
+php /var/www/html/occ config:system:set overwrite.cli.url --value="http://localhost:8080"
@@ -0,0 +1,6 @@
+#!/bin/bash
+set -euox pipefail
+echo "Disabling bruteforce protection and rate limiting for dev/CI..."
+php /var/www/html/occ config:system:set auth.bruteforce.protection.enabled --value=false --type=boolean
+php /var/www/html/occ config:system:set ratelimit.protection.enabled --value=false --type=boolean
+echo "Bruteforce protection and rate limiting disabled."
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -euox pipefail
+
+php /var/www/html/occ app:enable news
@@ -2,4 +2,30 @@

 set -euox pipefail

-php /var/www/html/occ app:enable notes
+echo "Installing and configuring notes app for testing..."
+
+# Check if development notes app is mounted at /opt/apps/notes
+if [ -d /opt/apps/notes ]; then
+    echo "Development notes app found at /opt/apps/notes"
+
+    # Remove any existing notes app in apps (from app store or old symlink)
+    if [ -e /var/www/html/custom_apps/notes ]; then
+        echo "Removing existing notes in apps..."
+        rm -rf /var/www/html/custom_apps/notes
+    fi
+
+    # Create symlink from apps to the mounted development version
+    # Per Nextcloud docs: apps outside server root need symlinks in server root
+    echo "Creating symlink: custom_apps/notes -> /opt/apps/notes"
+    ln -sf /opt/apps/notes /var/www/html/custom_apps/notes
+
+    echo "Enabling notes app from /opt/apps (development mode via symlink)"
+    php /var/www/html/occ app:enable notes
+elif [ -d /var/www/html/custom_apps/notes ]; then
+    echo "notes app directory found in apps (already installed)"
+    php /var/www/html/occ app:enable notes
+else
+    echo "notes app not found, installing from app store..."
+    php /var/www/html/occ app:install notes
+    php /var/www/html/occ app:enable notes
+fi
@@ -13,6 +13,14 @@ echo "===================================================================="
 echo "Configuring user_oidc provider for Keycloak..."
 echo "===================================================================="

+# Quick check: Is keycloak service in the Docker network?
+# When the keycloak profile is not active, this hostname won't resolve.
+if ! getent hosts keycloak >/dev/null 2>&1; then
+    echo "  Keycloak service not detected in Docker network (profile not active)"
+    echo "  Skipping keycloak provider configuration"
+    exit 0
+fi
+
 # Wait for Keycloak to be ready and realm to be available
 echo "Waiting for Keycloak realm to be available..."
 MAX_RETRIES=30
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+set -euox pipefail
+
+echo "Installing Astrolabe app..."
+
+# Check if development astrolabe app is mounted at /opt/apps/astrolabe
+if [ -d /opt/apps/astrolabe ]; then
+    echo "Development astrolabe app found at /opt/apps/astrolabe"
+
+    # Remove any existing astrolabe app in custom_apps (from app store or old symlink)
+    if [ -e /var/www/html/custom_apps/astrolabe ]; then
+        echo "Removing existing astrolabe in custom_apps..."
+        rm -rf /var/www/html/custom_apps/astrolabe
+    fi
+
+    # Create symlink from custom_apps to the mounted development version
+    # Per Nextcloud docs: apps outside server root need symlinks in server root
+    echo "Creating symlink: custom_apps/astrolabe -> /opt/apps/astrolabe"
+    ln -sf /opt/apps/astrolabe /var/www/html/custom_apps/astrolabe
+
+    echo "Enabling astrolabe app from /opt/apps (development mode via symlink)"
+    php /var/www/html/occ app:enable astrolabe
+elif [ -d /var/www/html/custom_apps/astrolabe ]; then
+    echo "astrolabe app directory found in custom_apps (already installed)"
+    php /var/www/html/occ app:enable astrolabe
+else
+    echo "astrolabe app not found, installing from app store..."
+    php /var/www/html/occ app:install astrolabe
+    php /var/www/html/occ app:enable astrolabe
+fi
+
+echo "Astrolabe app installed successfully"
+echo ""
+echo "Note: MCP server configuration is managed dynamically during tests"
+echo "      to support testing multiple MCP server deployments."
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Configure MCP server URL for Astrolabe background sync
+# This URL is used by Astrolabe to send app passwords to the MCP server
+
+set -e
+
+if [ -z "${MCP_SERVER_URL:-}" ]; then
+  echo "MCP_SERVER_URL not set, skipping Astrolabe MCP server URL configuration"
+  exit 0
+fi
+
+echo "Configuring MCP server URL: $MCP_SERVER_URL"
+
+# Set the mcp_server_url in config.php via occ
+php occ config:system:set mcp_server_url --value="$MCP_SERVER_URL"
+
+echo "MCP server URL configured successfully"
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" width="512" height="512" viewBox="0 0 512 512">
+  <rect width="512" height="512" rx="80" ry="80" fill="#0082C9"/>
+  <path d="M255.9 21.04c-11.8 0-22.2 4.08-28.6 10.01-5.6 4.98-8.6 11.41-8.6 18.11 0 5.55 2.2 11.01 5.9 15.48-16.4 4.97-30.1 13.64-39 24.53 22.1-7.67 45.7-11.86 70.3-11.86 24.6 0 48.3 4.19 70.3 11.86-8.9-10.89-22.6-19.56-39-24.53 3.9-4.47 5.9-9.93 5.9-15.48 0-6.7-3-13.13-8.5-18.11-6.4-5.93-16.9-10.01-28.7-10.01zm0 20.34c5.3 0 10.1 1.27 13.6 3.52 1.7 1.16 3.4 2.43 3.4 4.27 0 1.76-1.7 3.03-3.4 4.19-3.5 2.33-8.3 3.61-13.6 3.61-5.3 0-10.1-1.28-13.6-3.61-1.6-1.16-3.3-2.43-3.3-4.19 0-1.84 1.7-3.11 3.3-4.27 3.5-2.25 8.3-3.52 13.6-3.52zm.1 48.1c-110.8 0-200.72 90.02-200.72 200.82S145.2 491 256 491s200.7-89.9 200.7-200.7c0-110.8-89.9-200.82-200.7-200.82zm0 32.62c92.9 0 168.2 75.3 168.2 168.2 0 92.8-75.3 168.2-168.2 168.2-92.9 0-168.26-75.4-168.26-168.2 0-92.9 75.36-168.2 168.26-168.2zm-8.2 6.3c-9.6.5-19 1.9-28.3 4.1l2.3 7.8c8.4-2 17.1-3.3 26-3.8v-8.1zm16.2 0v8.1c9 .5 17.7 1.8 26 3.8l2.2-7.8c-9.1-2.2-18.6-3.6-28.2-4.1zm-60 8.5c-9 3.2-17.6 7-25.8 11.6l4.1 7.1c7.7-4.3 15.6-7.9 23.9-10.8l-2.2-7.9zm103.7 0-2 7.9c8.4 2.9 16.2 6.5 23.8 10.8l4.2-7.1c-8.2-4.6-16.9-8.4-26-11.6zm-143.3 20.3c-7.5 5.4-14.6 11.4-21.1 17.9l5.8 5.8c5.9-6.1 12.5-11.7 19.5-16.6l-4.2-7.1zm182.9 0-4 7.1c6.9 4.9 13.5 10.5 19.5 16.6l5.7-5.8c-6.5-6.5-13.7-12.5-21.2-17.9zm-91.4 11.5c-37 0-67.4 28.6-70.3 64.9l15.9 4.7c.7-29.6 24.7-53.4 54.4-53.4 30.1 0 54.4 24.4 54.4 54.3 0 15-6.2 28.7-16 38.5l.1.1c1.7 2.7 3 5.6 4.1 8.6.9 3 1.7 5.7 2.3 8.6v.4c33.8-16.7 57.2-51.5 57.2-91.7 0-3.8-.2-7.3-.6-10.9-3.2-3.3-6.3-6.4-9.8-9.5 1.5 6.5 2.3 13.4 2.3 20.4 0 28.7-13 54.7-33.5 71.8 6.3-10.6 10.1-23 10.1-36.3 0-38.9-31.7-70.5-70.6-70.5zm-91.8 14.6c-3.3 3.1-6.5 6.2-9.7 9.5-.3 3.6-.5 7.1-.5 10.9 0 7.3.7 14.2 2.1 20.9l9.1 2.7c-2.1-7.5-3.1-15.4-3.1-23.6 0-7 .7-13.9 2.1-20.4zm-31.6 4c-5.8 7.1-10.9 14.6-15.4 22.6l7.1 4c4.1-7.4 8.8-14.3 14-20.8l-5.7-5.8zm246.8 0-5.7 5.8c5.3 6.5 10 13.4 13.9 20.8l7.1-4c-4.4-8-9.5-15.5-15.3-22.6zm-269.2 37.1c-2.5 5.7-4.6 11.4-6.4 17.6l.1-.3c3.4-5 7.9-9.3 12.9-12.5l.3-.6-6.9-4.2zm291.8 0-7.2 4.2c3.2 7.3 5.7 15.1 7.6 23.1l7.9-2.1c-2.1-8.8-4.9-17.3-8.3-25.2zm-261.2 11.5c-13.4.1-25.7 9-29.7 22.5l114.8 34.2c-4.9 16.7 4.6 34.2 21.2 39.2L361.7 366c16.6 5 34.1-4.4 39.1-21l-114.6-34.4c4.9-16.5-4.7-34.1-21.3-39.1 0 0-72.4-21.5-114.8-34.3-3.1-.9-6.3-1.4-9.4-1.3zm-42.09 29.7c-.9 6.9-1.4 14-1.4 21.3 0 1.3.1 2.9.1 4.2h8.09v-4.2c0-6.5.4-12.9 1.2-19.2l-7.99-2.1zm314.59 0-7.9 2.1c.7 6.3 1.3 12.7 1.3 19.2 0 1.3 0 2.9-.2 4.2h8.2v-4.2c0-7.3-.5-14.4-1.4-21.3zm-157.3 24.7c6.3 0 11.5 5 11.5 11.3 0 6.4-5.2 11.6-11.5 11.6s-11.5-5.2-11.5-11.6c0-6.3 5.2-11.3 11.5-11.3zM98.51 307.4c1 8.2 2.89 16.4 5.09 24.3l7.9-2.1c-2.1-7.2-3.8-14.6-4.8-22.2h-8.19zm306.69 0c-1.1 7.6-2.7 15-4.8 22.2l7.8 2.1c2.2-7.9 4.1-16.1 5.2-24.3h-8.2zm-191.3 10.9c-19 13.3-31.4 35.3-31.4 60.1 0 10.4 2.3 20.4 6.2 29.7 8.8 4.9 17.9 8.8 27.6 11.7-10.8-10.7-17.5-25.2-17.5-41.4 0-19 9.3-36 23.7-46.3-3.8-4.1-6.7-8.7-8.6-13.8zM116.8 345l-7.9 2c3.1 7.6 6.8 14.7 11 21.6l6.9-4.2c-3.8-6.2-7-12.8-10-19.4zm194.8 20.5c.9 4.1 1.4 8.5 1.4 12.9 0 16.2-6.7 30.7-17.4 41.4 9.6-2.9 18.8-6.8 27.5-11.7 4-9.3 6.2-19.3 6.2-29.7 0-2.7-.2-5.2-.4-7.7l-17.3-5.2zM136 377.9l-7.1 4.1c4.7 6.2 9.7 12.1 15.3 17.3l5.7-5.5c-5.1-5-9.7-10.3-13.9-15.9zm243.9 2.3-.2.1c-2.1.3-4 .6-6.2.7h-.1c-3.6 4.5-7.3 8.8-11.5 12.8l5.8 5.5c5.5-5.2 10.5-11.1 15.2-17.3l-3-1.8zm-217.8 24-5.9 5.9c6 4.8 12.2 9.7 18.8 13.6l3.8-7.8c-5.7-2.9-11.4-6.8-16.7-11.7zm187.7 0c-5.4 4.9-11.1 8.8-16.8 11.7l3.9 7.8c6.5-3.9 12.8-8.8 18.7-13.6l-5.8-5.9zm-156.4 19.5-4.1 6.8c6.6 4 13.7 5.8 20.7 8.8l2.2-7.9c-6.5-1.9-12.7-4.8-18.8-7.7zm125.2 0c-6.2 2.9-12.5 5.8-19.1 7.7l2.3 7.9c7.2-3 14-4.8 20.7-8.8l-3.9-6.8zm-90.7 11.7-2 7.8c7.1 1 14.5 1.9 21.9 1.9v-7.7c-6.8 0-13.5-1.1-19.9-2zm55.9 0c-6.3.9-13 2-19.8 2v7.7c7.5 0 14.8-.9 22.1-1.9l-2.3-7.8z" fill="#fff"/>
+</svg>
@@ -0,0 +1,25 @@
+[tool.commitizen]
+name = "cz_conventional_commits"
+version = "0.58.3"
+tag_format = "nextcloud-mcp-server-$version"
+version_scheme = "semver"
+update_changelog_on_bump = true
+major_version_zero = true
+
+# Update chart version only (NOT appVersion)
+version_files = [
+    "Chart.yaml:^version:"
+]
+
+# Ignore tags from other components
+ignored_tag_formats = [
+    "v*",              # MCP server tags
+    "astrolabe-v*",    # Astrolabe tags
+]
+
+# Filter commits by scope
+# Includes helm-scoped commits AND MCP server version bumps (which update appVersion)
+[tool.commitizen.customize]
+changelog_pattern = "^((feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:|bump: version.*→.*)"
+schema_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:\\s.+"
+message_template = "{{change_type}}(helm): {{message}}"
@@ -1,9 +1,9 @@
 dependencies:
 - name: qdrant
  repository: https://qdrant.github.io/qdrant-helm
-  version: 1.15.5
+  version: 1.17.0
 - name: ollama
  repository: https://otwld.github.io/ollama-helm
-  version: 1.34.0
-digest: sha256:d51c97d05be2614b751c0dd7267ef7dc959eff5ebef859c5f895c5c554b7a874
-generated: "2025-11-09T17:08:02.86648061Z"
+  version: 1.47.0
+digest: sha256:08d589dd1b3386e8e8a2ac2c03a2194218ab12ed9e02016e7b981e554385dd11
+generated: "2026-03-02T11:15:27.688786078Z"
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.31.1
-appVersion: "0.31.1"
+version: 0.58.3
+appVersion: "0.65.0"
 keywords:
  - nextcloud
  - mcp
@@ -21,12 +21,25 @@ home: https://github.com/cbcoutinho/nextcloud-mcp-server
 sources:
  - https://github.com/cbcoutinho/nextcloud-mcp-server
 icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
+annotations:
+  # Grafana dashboard support
+  grafana_dashboard: "true"
+  grafana_dashboard_folder: "Nextcloud MCP"
+  artifacthub.io/changes: |
+    - kind: added
+      description: Login Flow v2 auth mode for Helm chart (ADR-022)
+    - kind: added
+      description: Multi-user BasicAuth guidance in post-install NOTES
+    - kind: added
+      description: Version and changelog info in post-install NOTES
+    - kind: changed
+      description: Updated appVersion to 0.64.4
 dependencies:
  - name: qdrant
-    version: "1.15.5"
+    version: "1.17.0"
    repository: https://qdrant.github.io/qdrant-helm
    condition: qdrant.networkMode.deploySubchart
  - name: ollama
-    version: "1.34.0"
+    version: "1.47.0"
    repository: https://otwld.github.io/ollama-helm
    condition: ollama.enabled
@@ -99,11 +99,11 @@ ingress:
 |-----------|-------------|---------|
 | `nextcloud.host` | URL of your Nextcloud instance (required) | `""` |
 | `nextcloud.mcpServerUrl` | MCP server URL for OAuth callbacks (OAuth only, optional) | Smart default* |
-| `nextcloud.publicIssuerUrl` | Public issuer URL for OAuth (OAuth only, optional) | Smart default** |
+| `nextcloud.publicIssuerUrl` | Public URL for browser-accessible OAuth authorization endpoint (OAuth only, optional) | Smart default** |

 **Smart Defaults:**
 - `*mcpServerUrl`: If not set, automatically uses ingress host (if enabled) or `http://localhost:8000` (for port-forward setups)
- `**publicIssuerUrl`: If not set, automatically defaults to `nextcloud.host` (which works when both clients and MCP server access Nextcloud at the same URL)
+- `**publicIssuerUrl`: If not set, defaults to `nextcloud.host`. **Only used for authorization endpoints** that browsers must access. All server-to-server endpoints (token, JWKS, introspection, userinfo) use URLs from OIDC discovery without rewriting

 #### Authentication

@@ -118,6 +118,25 @@ ingress:
 | `auth.oauth.persistence.enabled` | Enable persistent storage for OAuth | `true` |
 | `auth.oauth.persistence.size` | Size of OAuth storage PVC | `100Mi` |

+#### Data Storage
+
+The `/app/data` directory is used for application data (token databases, Qdrant persistent storage, etc.). It is always mounted as writable to support the read-only root filesystem security context.
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `dataStorage.enabled` | Enable persistent storage for `/app/data` | `false` |
+| `dataStorage.size` | Size of data storage PVC | `1Gi` |
+| `dataStorage.storageClass` | Storage class (leave empty for default) | `""` |
+| `dataStorage.accessMode` | Access mode | `ReadWriteOnce` |
+| `dataStorage.existingClaim` | Use existing PVC | `""` |
+
+**When to enable persistence:**
+- Multi-user basic auth with offline access (stores `tokens.db`)
+- Qdrant persistent mode (stores vector database)
+- Any feature requiring persistent app data
+
+**When persistence is disabled:** Uses `emptyDir` (non-persistent, data lost on pod restart, but directory remains writable).
+
 #### MCP Server Configuration

 | Parameter | Description | Default |
@@ -208,16 +227,16 @@ The application exposes HTTP health check endpoints:

 #### Vector Search & Semantic Capabilities (Optional)

-Enable semantic search capabilities by deploying a vector database (Qdrant) and embedding service (Ollama or OpenAI).
+Enable semantic search capabilities with BM25 hybrid search by deploying a vector database (Qdrant) and embedding service (Ollama or OpenAI).

-**Vector Sync Configuration:**
+**Semantic Search Configuration:**

 | Parameter | Description | Default |
 |-----------|-------------|---------|
-| `vectorSync.enabled` | Enable background vector synchronization | `false` |
-| `vectorSync.scanInterval` | Scan interval in seconds | `3600` |
-| `vectorSync.processorWorkers` | Number of concurrent processor workers | `3` |
-| `vectorSync.queueMaxSize` | Maximum queue size for pending documents | `10000` |
+| `semanticSearch.enabled` | Enable semantic search and background vector synchronization | `false` |
+| `semanticSearch.scanInterval` | Scan interval in seconds | `3600` |
+| `semanticSearch.processorWorkers` | Number of concurrent processor workers | `3` |
+| `semanticSearch.queueMaxSize` | Maximum queue size for pending documents | `10000` |

 **Document Chunking Configuration:**

@@ -280,6 +299,72 @@ Use OpenAI or any OpenAI-compatible API instead of Ollama.
 | `openai.secretKey` | Key in secret containing API key | `api-key` |
 | `openai.baseUrl` | Custom API endpoint (optional) | `""` |

+#### Observability & Monitoring
+
+The chart includes comprehensive observability features including Prometheus metrics, OpenTelemetry tracing, and Grafana dashboards.
+
+**Metrics Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.metrics.enabled` | Enable Prometheus metrics | `true` |
+| `observability.metrics.port` | Metrics port | `9090` |
+| `observability.metrics.path` | Metrics endpoint path | `/metrics` |
+
+**Tracing Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.tracing.enabled` | Enable OpenTelemetry tracing | `false` |
+| `observability.tracing.endpoint` | OTLP collector endpoint | `""` |
+| `observability.tracing.serviceName` | Service name in traces | `nextcloud-mcp-server` |
+| `observability.tracing.samplingRate` | Trace sampling rate (0.0-1.0) | `1.0` |
+
+**Logging Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.logging.format` | Log format (json or text) | `json` |
+| `observability.logging.level` | Log level | `INFO` |
+| `observability.logging.includeTraceContext` | Include trace IDs in logs | `true` |
+
+**ServiceMonitor (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `serviceMonitor.enabled` | Create ServiceMonitor resource | `false` |
+| `serviceMonitor.interval` | Scrape interval | `30s` |
+| `serviceMonitor.scrapeTimeout` | Scrape timeout | `10s` |
+| `serviceMonitor.labels` | Additional labels for ServiceMonitor | `{}` |
+
+**PrometheusRule (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `prometheusRule.enabled` | Create PrometheusRule with alert rules | `false` |
+| `prometheusRule.labels` | Additional labels for PrometheusRule | `{}` |
+
+**Grafana Dashboards:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `dashboards.enabled` | Enable automatic dashboard provisioning | `false` |
+| `dashboards.grafanaFolder` | Grafana folder name for dashboards | `Nextcloud MCP` |
+| `dashboards.labels` | Additional labels for dashboard ConfigMap | `{}` |
+| `dashboards.annotations` | Additional annotations for dashboard ConfigMap | `{}` |
+
+When `dashboards.enabled` is `true`, a ConfigMap with the Grafana dashboard is created with the `grafana_dashboard: "1"` label. This enables automatic discovery by Grafana sidecar containers (commonly used with kube-prometheus-stack).
+
+The dashboard provides comprehensive monitoring including:
+- HTTP request metrics (RED pattern: Rate, Errors, Duration)
+- MCP tool performance and errors
+- Nextcloud API performance by app (notes, calendar, contacts, etc.)
+- OAuth token operations and cache hit rates
+- External dependency health (Nextcloud, Qdrant, Keycloak, Unstructured API)
+- Vector sync processing pipeline (when enabled)
+
+For manual import or more details, see `charts/nextcloud-mcp-server/dashboards/README.md`.
+
 ## Examples

 ### Example 1: Basic Auth with Ingress
@@ -361,7 +446,7 @@ nextcloud:
  host: https://cloud.example.com
  # mcpServerUrl and publicIssuerUrl are optional!
  # If not set, mcpServerUrl defaults to ingress host or localhost
-  # publicIssuerUrl defaults to nextcloud.host
+  # publicIssuerUrl defaults to nextcloud.host (only used for browser-accessible auth endpoint)

 auth:
  mode: oauth
@@ -393,7 +478,7 @@ This example shows OAuth without pre-registered credentials (using DCR) and opti
 nextcloud:
  host: https://cloud.example.com
  # mcpServerUrl will automatically use ingress host (https://mcp.example.com)
-  # publicIssuerUrl will automatically default to nextcloud.host
+  # publicIssuerUrl will automatically default to nextcloud.host (only used for browser-accessible auth endpoint)

 auth:
  mode: oauth
@@ -471,8 +556,8 @@ auth:
    username: admin
    password: secure-password

-# Enable vector sync
-vectorSync:
+# Enable semantic search
+semanticSearch:
  enabled: true
  scanInterval: 1800  # Scan every 30 minutes
  processorWorkers: 5
@@ -510,7 +595,7 @@ ollama:
 Or use an external Ollama instance:

 ```yaml
-vectorSync:
+semanticSearch:
  enabled: true

 qdrant:
@@ -526,7 +611,7 @@ ollama:
 Or use OpenAI for embeddings:

 ```yaml
-vectorSync:
+semanticSearch:
  enabled: true

 qdrant:
@@ -623,7 +708,9 @@ Readiness (returns 200 if ready, 503 if not ready):

 1. **Connection refused to Nextcloud**
   - Verify `nextcloud.host` is accessible from the Kubernetes cluster
+   - For OAuth mode: Ensure MCP server can reach OIDC discovery endpoints (token, JWKS, introspection, userinfo URLs)
   - Check network policies and firewall rules
+   - Note: Do not use internal Docker hostnames (like `http://app:80`) for `nextcloud.host` - use externally resolvable URLs

 2. **Authentication failures**
   - For basic auth: verify username/password are correct
@@ -6,14 +6,57 @@ This directory contains example Grafana dashboards for monitoring the Nextcloud

 ### nextcloud-mcp-server.json

-Comprehensive dashboard with the following panels:
+All-in-one Operations Dashboard with comprehensive monitoring across all system components.

- **Request Rate**: HTTP requests per second by method and endpoint
- **Error Rate**: Percentage of 5xx errors
- **Request Latency**: P50 and P95 latency by endpoint
- **Top MCP Tools**: Most frequently called tools
- **Nextcloud API Latency**: API call latency by app (notes, calendar, etc.)
- **Vector Sync Queue**: Queue size for background document processing
+#### Overview Row
+High-level metrics for quick health assessment:
+- **Request Rate** (stat): Total requests per second
+- **Error Rate** (stat): Percentage of 5xx errors with color thresholds
+- **P95 Latency** (stat): 95th percentile request latency
+- **Active Requests** (stat): Current in-flight requests
+
+#### HTTP Metrics (RED Pattern)
+Core request/error/duration metrics:
+- **Request Rate by Endpoint** (timeseries): RPS breakdown by endpoint
+- **Error Rate by Status Code** (timeseries): Error rates for 4xx/5xx codes
+- **Latency Percentiles** (timeseries): P50, P95, P99 latency trends
+- **Status Code Distribution** (piechart): Percentage breakdown of all status codes
+
+#### MCP Tools Row
+MCP-specific tool performance:
+- **Top Tools by Call Volume** (bargauge): Top 10 most-called tools
+- **Tool Error Rate** (timeseries): Error rates per tool
+- **Tool Execution Duration** (timeseries): P95 latency by tool
+
+#### Nextcloud API Row
+Backend API performance metrics:
+- **API Calls by App** (timeseries): Request rate per Nextcloud app (notes, calendar, contacts, etc.)
+- **API Latency by App** (timeseries): P95 latency per app
+- **API Retries by Reason** (timeseries): Retry patterns (429, timeout, connection errors)
+- **API Error Rate** (stat): Overall API error percentage
+
+#### OAuth & Authentication Row
+OAuth token operations and caching:
+- **Token Validations** (timeseries): Success/failure rates for token validation
+- **Token Exchange Operations** (timeseries): RFC 8693 token exchange operations
+- **Token Cache Hit Rate** (stat): Percentage of cache hits (color-coded: red<50%, yellow<80%, green≥80%)
+- **Refresh Token Operations** (timeseries): Refresh token storage operations by type
+
+#### Dependencies & Health Row
+External dependency status monitoring:
+- **Nextcloud Health** (stat): UP/DOWN status with color coding
+- **Qdrant Health** (stat): Vector database health status
+- **Keycloak Health** (stat): Identity provider health status
+- **Unstructured API Health** (stat): Document processing API status
+- **Health Check Duration** (timeseries): Health check latency by dependency
+- **Database Operation Latency** (timeseries): P95 latency for DB operations (SQLite, Qdrant)
+
+#### Vector Sync Row (when enabled)
+Document processing pipeline metrics:
+- **Documents Processed Rate** (timeseries): Processing throughput by status (success/failure)
+- **Processing Queue Depth** (gauge): Current queue size with thresholds (yellow>50, red>100)
+- **Qdrant Operations** (timeseries): Vector database operations by type
+- **Document Processing Duration** (timeseries): P95 processing latency

 ## Importing to Grafana

@@ -25,49 +68,77 @@ Comprehensive dashboard with the following panels:
 4. Select your Prometheus data source
 5. Click "Import"

-### Automated Import (Kubernetes)
+### Automated Import (Helm Chart)

-If using the Grafana Operator or kube-prometheus-stack, you can create a ConfigMap:
+The Helm chart now supports automatic dashboard provisioning via Grafana sidecar pattern.
+
+#### Option 1: Using Helm Chart (Recommended)
+
+Enable dashboard provisioning in your Helm values:
+
+```yaml
+# values.yaml for nextcloud-mcp-server chart
+dashboards:
+  enabled: true
+  grafanaFolder: "Nextcloud MCP"  # Folder name in Grafana
+  labels: {}  # Additional labels if needed
+```
+
+Then deploy or upgrade:

 ```bash
-kubectl create configmap nextcloud-mcp-dashboards \
+helm upgrade --install nextcloud-mcp nextcloud-mcp-server \
+  --set dashboards.enabled=true
+```
+
+The dashboard will be automatically imported by Grafana if the sidecar is configured
+to watch for ConfigMaps with label `grafana_dashboard: "1"`.
+
+#### Option 2: Using kube-prometheus-stack
+
+If using kube-prometheus-stack with Grafana sidecar enabled, the dashboard will be
+automatically discovered and imported. Ensure your Grafana deployment has:
+
+```yaml
+# kube-prometheus-stack values
+grafana:
+  sidecar:
+    dashboards:
+      enabled: true
+      label: grafana_dashboard
+      folder: /tmp/dashboards
+      provider:
+        foldersFromFilesStructure: true
+```
+
+#### Option 3: Manual ConfigMap Creation
+
+For other Grafana setups, create a ConfigMap manually:
+
+```bash
+kubectl create configmap nextcloud-mcp-dashboard \
  --from-file=nextcloud-mcp-server.json \
  -n monitoring

-# Add label for Grafana sidecar to discover
-kubectl label configmap nextcloud-mcp-dashboards \
+# Add sidecar discovery label
+kubectl label configmap nextcloud-mcp-dashboard \
  grafana_dashboard=1 \
  -n monitoring
-```

-Or add to your Helm values:
-
-```yaml
-# values.yaml for kube-prometheus-stack
-grafana:
-  dashboardProviders:
-    dashboardproviders.yaml:
-      apiVersion: 1
-      providers:
-        - name: 'nextcloud-mcp'
-          orgId: 1
-          folder: 'Nextcloud MCP'
-          type: file
-          disableDeletion: false
-          editable: true
-          options:
-            path: /var/lib/grafana/dashboards/nextcloud-mcp
-
-  dashboardsConfigMaps:
-    nextcloud-mcp: nextcloud-mcp-dashboards
+# Add folder annotation (annotations support spaces, unlike labels)
+kubectl annotate configmap nextcloud-mcp-dashboard \
+  grafana_folder="Nextcloud MCP" \
+  -n monitoring
 ```

 ## Dashboard Variables

-The dashboard includes two variables:
+The dashboard includes four template variables for dynamic filtering:

- **Data Source**: Select your Prometheus data source
- **Namespace**: Filter metrics by Kubernetes namespace
+- **datasource**: Select your Prometheus data source
+- **namespace**: Filter metrics by Kubernetes namespace (supports "All")
+- **pod**: Filter by specific pod(s) - multi-select enabled (supports "All")
+- **interval**: Query interval for rate calculations (1m, 5m, 10m, 30m, 1h - default: 5m)

 ## Customization

@@ -57,6 +57,28 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic

   IMPORTANT: OAuth mode is experimental and requires patches to the user_oidc app.
   See: https://github.com/cbcoutinho/nextcloud-mcp-server#authentication
+{{- else if eq .Values.auth.mode "multi-user-basic" }}
+
+3. Multi-User BasicAuth Mode (Pass-Through):
+   - Users provide credentials via Authorization header
+   - Connected to: {{ .Values.nextcloud.host }}
+   {{- if .Values.auth.multiUserBasic.enableOfflineAccess }}
+   - Offline access: Enabled (background operations with app passwords)
+   - Token storage: {{ .Values.auth.multiUserBasic.tokenStorageDb }}
+   {{- else }}
+   - Offline access: Disabled (stateless pass-through)
+   {{- end }}
+{{- else if eq .Values.auth.mode "login-flow" }}
+
+3. Login Flow v2 Mode (Experimental, ADR-022):
+   - Server URL: {{ include "nextcloud-mcp-server.mcpServerUrl" . }}
+   - Connected to: {{ .Values.nextcloud.host }}
+   - Token storage: {{ .Values.auth.loginFlow.tokenStorageDb }}
+
+   Users authenticate via Nextcloud's native Login Flow v2 — no OAuth patches required.
+   Each user gets a per-device app password managed by the MCP server.
+
+   IMPORTANT: Login Flow v2 is experimental. See ADR-022 for details.
 {{- end }}

 {{- if .Values.documentProcessing.enabled }}
@@ -69,12 +91,12 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
   {{- end }}
 {{- end }}

-{{- if .Values.vectorSync.enabled }}
+{{- if .Values.semanticSearch.enabled }}

-5. Vector Search & Semantic Capabilities:
-   - Vector Sync: Enabled
-   - Scan Interval: {{ .Values.vectorSync.scanInterval }}s
-   - Processor Workers: {{ .Values.vectorSync.processorWorkers }}
+5. Semantic Search & Vector Capabilities:
+   - Semantic Search: Enabled
+   - Scan Interval: {{ .Values.semanticSearch.scanInterval }}s
+   - Processor Workers: {{ .Values.semanticSearch.processorWorkers }}
   {{- if .Values.qdrant.enabled }}
   - Qdrant: Deployed as subchart ({{ .Release.Name }}-qdrant:6333)
   {{- else }}
@@ -96,6 +118,85 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
   kubectl --namespace {{ .Release.Namespace }} exec -it deploy/{{ include "nextcloud-mcp-server.fullname" . }} -- curl -s http://localhost:{{ include "nextcloud-mcp-server.port" . }}/user/page | grep "Vector Sync"
 {{- end }}

+{{- if .Values.dashboards.enabled }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Enabled
+   - ConfigMap: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+   - Grafana Folder: {{ .Values.dashboards.grafanaFolder }}
+
+   The dashboard will be automatically imported by Grafana if the sidecar is configured
+   to watch for ConfigMaps with label "grafana_dashboard: 1".
+
+   To manually import the dashboard:
+   kubectl --namespace {{ .Release.Namespace }} get configmap {{ include "nextcloud-mcp-server.fullname" . }}-dashboard -o jsonpath='{.data.nextcloud-mcp-server\.json}' | jq . > dashboard.json
+
+   Then import dashboard.json via Grafana UI (Dashboards → Import).
+{{- else }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Disabled
+   - To enable automatic dashboard provisioning, set: dashboards.enabled=true
+
+   Manual import option:
+   The dashboard JSON is available in the chart at charts/nextcloud-mcp-server/dashboards/nextcloud-mcp-server.json
+{{- end }}
+
+{{- $legacyMultiUserBasic := eq (include "nextcloud-mcp-server.legacyMultiUserBasicPersistence" .) "true" }}
+{{- $legacyQdrant := eq (include "nextcloud-mcp-server.legacyQdrantPersistence" .) "true" }}
+{{- if or $legacyMultiUserBasic $legacyQdrant }}
+
+================================================================================
+                         DEPRECATION WARNING
+================================================================================
+
+You are using deprecated persistence configuration that will be removed in a
+future release. Your deployment will continue to work, but please migrate to
+the new unified dataStorage configuration.
+
+Deprecated settings detected:
+{{- if $legacyMultiUserBasic }}
+  - auth.multiUserBasic.persistence.* (currently enabled)
+{{- end }}
+{{- if $legacyQdrant }}
+  - qdrant.localPersistence.* (currently enabled)
+{{- end }}
+
+To migrate, update your values.yaml:
+
+  dataStorage:
+    enabled: true
+{{- if $legacyMultiUserBasic }}
+    size: {{ .Values.auth.multiUserBasic.persistence.size }}
+{{- else if $legacyQdrant }}
+    size: {{ .Values.qdrant.localPersistence.size }}
+{{- end }}
+    # storageClass: ""  # Optional: specify storage class
+    # existingClaim: "" # Optional: use existing PVC to preserve data
+
+After migrating, remove the deprecated settings:
+{{- if $legacyMultiUserBasic }}
+  - auth.multiUserBasic.persistence.enabled
+  - auth.multiUserBasic.persistence.size
+  - auth.multiUserBasic.persistence.storageClass
+  - auth.multiUserBasic.persistence.accessMode
+{{- end }}
+{{- if $legacyQdrant }}
+  - qdrant.localPersistence.enabled
+  - qdrant.localPersistence.size
+  - qdrant.localPersistence.storageClass
+  - qdrant.localPersistence.accessMode
+{{- end }}
+
+================================================================================
+{{- end }}
+
+Deployed version:
+  - Chart: {{ .Chart.Version }}
+  - App: {{ .Chart.AppVersion }}
+
+Full changelog: https://github.com/cbcoutinho/nextcloud-mcp-server/blob/master/charts/nextcloud-mcp-server/CHANGELOG.md
+
 For more information and documentation:
 - GitHub: https://github.com/cbcoutinho/nextcloud-mcp-server
 - Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
@@ -72,6 +72,28 @@ Create the name of the secret to use for basic auth
 {{- end }}
 {{- end }}

+{{/*
+Create the name of the secret to use for multi-user basic auth
+*/}}
+{{- define "nextcloud-mcp-server.multiUserBasicSecretName" -}}
+{{- if .Values.auth.multiUserBasic.existingSecret }}
+{{- .Values.auth.multiUserBasic.existingSecret }}
+{{- else }}
+{{- include "nextcloud-mcp-server.fullname" . }}-multi-user-basic
+{{- end }}
+{{- end }}
+
+{{/*
+Create the name of the PVC to use for multi-user basic token storage
+*/}}
+{{- define "nextcloud-mcp-server.multiUserBasicPvcName" -}}
+{{- if .Values.auth.multiUserBasic.persistence.existingClaim }}
+{{- .Values.auth.multiUserBasic.persistence.existingClaim }}
+{{- else }}
+{{- include "nextcloud-mcp-server.fullname" . }}-token-storage
+{{- end }}
+{{- end }}
+
 {{/*
 Create the name of the secret to use for OAuth
 */}}
@@ -83,6 +105,17 @@ Create the name of the secret to use for OAuth
 {{- end }}
 {{- end }}

+{{/*
+Create the name of the secret to use for Login Flow v2
+*/}}
+{{- define "nextcloud-mcp-server.loginFlowSecretName" -}}
+{{- if .Values.auth.loginFlow.existingSecret }}
+{{- .Values.auth.loginFlow.existingSecret }}
+{{- else }}
+{{- include "nextcloud-mcp-server.fullname" . }}-login-flow
+{{- end }}
+{{- end }}
+
 {{/*
 Create the name of the PVC to use for OAuth storage
 */}}
@@ -105,6 +138,57 @@ Create the name of the PVC to use for Qdrant local persistent storage
 {{- end }}
 {{- end }}

+{{/*
+Create the name of the PVC to use for /app/data storage
+*/}}
+{{- define "nextcloud-mcp-server.dataStoragePvcName" -}}
+{{- if .Values.dataStorage.existingClaim }}
+{{- .Values.dataStorage.existingClaim }}
+{{- else }}
+{{- include "nextcloud-mcp-server.fullname" . }}-data-storage
+{{- end }}
+{{- end }}
+
+{{/*
+Determine if data storage PVC should be enabled (backward compatible)
+Checks new dataStorage.enabled OR legacy persistence configs
+*/}}
+{{- define "nextcloud-mcp-server.dataStorageEnabled" -}}
+{{- if .Values.dataStorage.enabled -}}
+true
+{{- else if and (eq .Values.auth.mode "multi-user-basic") .Values.auth.multiUserBasic.enableOfflineAccess .Values.auth.multiUserBasic.persistence.enabled -}}
+true
+{{- else if eq .Values.auth.mode "login-flow" -}}
+true
+{{- else if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled -}}
+true
+{{- else -}}
+false
+{{- end -}}
+{{- end }}
+
+{{/*
+Check if legacy multi-user-basic persistence config is being used
+*/}}
+{{- define "nextcloud-mcp-server.legacyMultiUserBasicPersistence" -}}
+{{- if and (eq .Values.auth.mode "multi-user-basic") .Values.auth.multiUserBasic.enableOfflineAccess .Values.auth.multiUserBasic.persistence.enabled (not .Values.dataStorage.enabled) -}}
+true
+{{- else -}}
+false
+{{- end -}}
+{{- end }}
+
+{{/*
+Check if legacy qdrant persistence config is being used
+*/}}
+{{- define "nextcloud-mcp-server.legacyQdrantPersistence" -}}
+{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled (not .Values.dataStorage.enabled) -}}
+true
+{{- else -}}
+false
+{{- end -}}
+{{- end }}
+
 {{/*
 Return the MCP server port
 */}}
@@ -0,0 +1,25 @@
+{{- if .Values.dashboards.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.dashboards.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana sidecar discovery label
+    grafana_dashboard: "1"
+  annotations:
+    {{- with .Values.dashboards.annotations }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana folder name (annotations support spaces, unlike labels)
+    {{- if .Values.dashboards.grafanaFolder }}
+    grafana_folder: {{ .Values.dashboards.grafanaFolder | quote }}
+    {{- end }}
+data:
+  nextcloud-mcp-server.json: |-
+{{ .Files.Get "dashboards/nextcloud-mcp-server.json" | indent 4 }}
+{{- end }}
@@ -46,8 +46,10 @@ spec:
          args:
            - "--transport"
            - "{{ .Values.mcp.transport }}"
-            {{- if eq .Values.auth.mode "oauth" }}
+            {{- if or (eq .Values.auth.mode "oauth") (eq .Values.auth.mode "login-flow") }}
            - "--oauth"
+            {{- end }}
+            {{- if eq .Values.auth.mode "oauth" }}
            - "--oauth-token-type"
            - "{{ .Values.auth.oauth.tokenType }}"
            {{- end }}
@@ -68,7 +70,7 @@ spec:
            - name: NEXTCLOUD_HOST
              value: {{ .Values.nextcloud.host | quote }}
            {{- if eq .Values.auth.mode "basic" }}
-            # Basic auth mode
+            # Basic auth mode (single-user)
            - name: NEXTCLOUD_USERNAME
              valueFrom:
                secretKeyRef:
@@ -79,6 +81,41 @@ spec:
                secretKeyRef:
                  name: {{ include "nextcloud-mcp-server.basicAuthSecretName" . }}
                  key: {{ .Values.auth.basic.passwordKey }}
+            {{- else if eq .Values.auth.mode "multi-user-basic" }}
+            # Multi-user BasicAuth mode (pass-through)
+            - name: ENABLE_MULTI_USER_BASIC_AUTH
+              value: "true"
+            - name: NEXTCLOUD_MCP_SERVER_URL
+              value: {{ include "nextcloud-mcp-server.mcpServerUrl" . | quote }}
+            - name: NEXTCLOUD_PUBLIC_ISSUER_URL
+              value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
+            {{- if .Values.auth.multiUserBasic.enableOfflineAccess }}
+            # Background operations with app passwords (replaces deprecated ENABLE_OFFLINE_ACCESS)
+            - name: ENABLE_BACKGROUND_OPERATIONS
+              value: "true"
+            - name: TOKEN_STORAGE_DB
+              value: {{ .Values.auth.multiUserBasic.tokenStorageDb | quote }}
+            - name: TOKEN_ENCRYPTION_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
+                  key: {{ .Values.auth.multiUserBasic.tokenEncryptionKeyKey }}
+            - name: NEXTCLOUD_OIDC_SCOPES
+              value: {{ .Values.auth.multiUserBasic.scopes | quote }}
+            {{- if or .Values.auth.multiUserBasic.clientId .Values.auth.multiUserBasic.existingSecret }}
+            # Static OAuth credentials (optional - uses DCR if not provided)
+            - name: NEXTCLOUD_OIDC_CLIENT_ID
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
+                  key: {{ .Values.auth.multiUserBasic.clientIdKey }}
+            - name: NEXTCLOUD_OIDC_CLIENT_SECRET
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
+                  key: {{ .Values.auth.multiUserBasic.clientSecretKey }}
+            {{- end }}
+            {{- end }}
            {{- else if eq .Values.auth.mode "oauth" }}
            # OAuth mode
            - name: NEXTCLOUD_MCP_SERVER_URL
@@ -87,7 +124,7 @@ spec:
              value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
            - name: NEXTCLOUD_OIDC_SCOPES
              value: {{ .Values.auth.oauth.scopes | quote }}
-            {{- if .Values.auth.oauth.clientId }}
+            {{- if or .Values.auth.oauth.clientId .Values.auth.oauth.existingSecret }}
            - name: NEXTCLOUD_OIDC_CLIENT_ID
              valueFrom:
                secretKeyRef:
@@ -99,6 +136,21 @@ spec:
                  name: {{ include "nextcloud-mcp-server.oauthSecretName" . }}
                  key: {{ .Values.auth.oauth.clientSecretKey }}
            {{- end }}
+            {{- else if eq .Values.auth.mode "login-flow" }}
+            # Login Flow v2 mode (ADR-022)
+            - name: ENABLE_LOGIN_FLOW
+              value: "true"
+            - name: NEXTCLOUD_MCP_SERVER_URL
+              value: {{ include "nextcloud-mcp-server.mcpServerUrl" . | quote }}
+            - name: NEXTCLOUD_PUBLIC_ISSUER_URL
+              value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
+            - name: TOKEN_STORAGE_DB
+              value: {{ .Values.auth.loginFlow.tokenStorageDb | quote }}
+            - name: TOKEN_ENCRYPTION_KEY
+              valueFrom:
+                secretKeyRef:
+                  name: {{ include "nextcloud-mcp-server.loginFlowSecretName" . }}
+                  key: {{ .Values.auth.loginFlow.tokenEncryptionKeyKey }}
            {{- end }}
            {{- if .Values.documentProcessing.enabled }}
            # Document processing
@@ -147,16 +199,16 @@ spec:
              value: {{ .Values.documentProcessing.custom.types | quote }}
            {{- end }}
            {{- end }}
-            # Vector Sync
-            - name: VECTOR_SYNC_ENABLED
-              value: {{ .Values.vectorSync.enabled | quote }}
-            {{- if .Values.vectorSync.enabled }}
+            # Semantic Search (replaces deprecated VECTOR_SYNC_ENABLED)
+            - name: ENABLE_SEMANTIC_SEARCH
+              value: {{ .Values.semanticSearch.enabled | quote }}
+            {{- if .Values.semanticSearch.enabled }}
            - name: VECTOR_SYNC_SCAN_INTERVAL
-              value: {{ .Values.vectorSync.scanInterval | quote }}
+              value: {{ .Values.semanticSearch.scanInterval | quote }}
            - name: VECTOR_SYNC_PROCESSOR_WORKERS
-              value: {{ .Values.vectorSync.processorWorkers | quote }}
+              value: {{ .Values.semanticSearch.processorWorkers | quote }}
            - name: VECTOR_SYNC_QUEUE_MAX_SIZE
-              value: {{ .Values.vectorSync.queueMaxSize | quote }}
+              value: {{ .Values.semanticSearch.queueMaxSize | quote }}
            {{- end }}
            # Document Chunking (always set, used by vector sync processor)
            - name: DOCUMENT_CHUNK_SIZE
@@ -247,29 +299,29 @@ spec:
          volumeMounts:
            - name: tmp
              mountPath: /tmp
-            {{- if and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled }}
+            {{- if or (and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled) (eq .Values.auth.mode "login-flow") }}
            - name: oauth-storage
              mountPath: /app/.oauth
            {{- end }}
-            {{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
-            - name: qdrant-data
+            - name: data-storage
              mountPath: /app/data
-            {{- end }}
            {{- with .Values.volumeMounts }}
            {{- toYaml . | nindent 12 }}
            {{- end }}
      volumes:
        - name: tmp
          emptyDir: {}
-        {{- if and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled }}
+        {{- if or (and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled) (eq .Values.auth.mode "login-flow") }}
        - name: oauth-storage
          persistentVolumeClaim:
            claimName: {{ include "nextcloud-mcp-server.oauthPvcName" . }}
        {{- end }}
-        {{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
-        - name: qdrant-data
+        - name: data-storage
+        {{- if eq (include "nextcloud-mcp-server.dataStorageEnabled" .) "true" }}
          persistentVolumeClaim:
-            claimName: {{ include "nextcloud-mcp-server.qdrantPvcName" . }}
+            claimName: {{ include "nextcloud-mcp-server.dataStoragePvcName" . }}
+        {{- else }}
+          emptyDir: {}
        {{- end }}
        {{- with .Values.volumes }}
        {{- toYaml . | nindent 8 }}
@@ -16,20 +16,49 @@ spec:
      storage: {{ .Values.auth.oauth.persistence.size }}
 {{- end }}
 ---
-{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled (not .Values.qdrant.localPersistence.existingClaim) }}
+{{- if eq .Values.auth.mode "login-flow" }}
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
-  name: {{ include "nextcloud-mcp-server.fullname" . }}-qdrant-data
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-oauth-storage
  labels:
    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
 spec:
  accessModes:
-    - {{ .Values.qdrant.localPersistence.accessMode }}
-  {{- if .Values.qdrant.localPersistence.storageClass }}
-  storageClassName: {{ .Values.qdrant.localPersistence.storageClass }}
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 100Mi
+{{- end }}
+---
+{{- if and (eq (include "nextcloud-mcp-server.dataStorageEnabled" .) "true") (not .Values.dataStorage.existingClaim) }}
+{{- $legacyMultiUserBasic := eq (include "nextcloud-mcp-server.legacyMultiUserBasicPersistence" .) "true" }}
+{{- $legacyQdrant := eq (include "nextcloud-mcp-server.legacyQdrantPersistence" .) "true" }}
+{{- $accessMode := .Values.dataStorage.accessMode }}
+{{- $storageClass := .Values.dataStorage.storageClass }}
+{{- $size := .Values.dataStorage.size }}
+{{- if $legacyMultiUserBasic }}
+{{- $accessMode = .Values.auth.multiUserBasic.persistence.accessMode }}
+{{- $storageClass = .Values.auth.multiUserBasic.persistence.storageClass }}
+{{- $size = .Values.auth.multiUserBasic.persistence.size }}
+{{- else if $legacyQdrant }}
+{{- $accessMode = .Values.qdrant.localPersistence.accessMode }}
+{{- $storageClass = .Values.qdrant.localPersistence.storageClass }}
+{{- $size = .Values.qdrant.localPersistence.size }}
+{{- end }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-data-storage
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+spec:
+  accessModes:
+    - {{ $accessMode }}
+  {{- if $storageClass }}
+  storageClassName: {{ $storageClass }}
  {{- end }}
  resources:
    requests:
-      storage: {{ .Values.qdrant.localPersistence.size }}
+      storage: {{ $size }}
 {{- end }}
@@ -13,6 +13,24 @@ data:
 {{- end }}
 {{- end }}
 ---
+{{- if eq .Values.auth.mode "multi-user-basic" }}
+{{- if and .Values.auth.multiUserBasic.enableOfflineAccess (not .Values.auth.multiUserBasic.existingSecret) }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-multi-user-basic
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+type: Opaque
+data:
+  {{ .Values.auth.multiUserBasic.tokenEncryptionKeyKey }}: {{ .Values.auth.multiUserBasic.tokenEncryptionKey | b64enc | quote }}
+  {{- if .Values.auth.multiUserBasic.clientId }}
+  {{ .Values.auth.multiUserBasic.clientIdKey }}: {{ .Values.auth.multiUserBasic.clientId | b64enc | quote }}
+  {{ .Values.auth.multiUserBasic.clientSecretKey }}: {{ .Values.auth.multiUserBasic.clientSecret | b64enc | quote }}
+  {{- end }}
+{{- end }}
+{{- end }}
+---
 {{- if eq .Values.auth.mode "oauth" }}
 {{- if and .Values.auth.oauth.clientId (not .Values.auth.oauth.existingSecret) }}
 apiVersion: v1
@@ -27,3 +45,17 @@ data:
  {{ .Values.auth.oauth.clientSecretKey }}: {{ .Values.auth.oauth.clientSecret | b64enc | quote }}
 {{- end }}
 {{- end }}
+---
+{{- if eq .Values.auth.mode "login-flow" }}
+{{- if not .Values.auth.loginFlow.existingSecret }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-login-flow
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+type: Opaque
+data:
+  {{ .Values.auth.loginFlow.tokenEncryptionKeyKey }}: {{ .Values.auth.loginFlow.tokenEncryptionKey | b64enc | quote }}
+{{- end }}
+{{- end }}
@@ -26,21 +26,30 @@ nextcloud:
  # Example: https://mcp.example.com
  mcpServerUrl: ""

-  # Public issuer URL for OAuth (OAuth mode only)
-  # If not specified, defaults to nextcloud.host
-  # Only set this if your Nextcloud is accessible at a different URL for OAuth
+  # Public issuer URL for browser-accessible OAuth authorization endpoints (OAuth mode only)
+  # ONLY used to make authorization endpoints accessible to users' browsers
+  # All server-to-server communication (token endpoint, JWKS, introspection, userinfo)
+  # uses URLs from OIDC discovery without any rewriting
+  #
+  # Use case: When MCP server accesses Nextcloud at one URL but browsers need a different
+  # public URL for OAuth login (e.g., server uses internal DNS, browsers use public domain)
+  #
+  # If not specified, defaults to nextcloud.host (works when MCP server and browsers
+  # both access Nextcloud at the same URL)
  # Example: https://cloud.example.com
  publicIssuerUrl: ""

 # Authentication configuration
-# Choose either basic auth OR oauth (not both)
+# Choose one mode: "basic", "multi-user-basic", "oauth", or "login-flow"
 auth:
-  # Authentication mode: "basic" or "oauth"
-  # basic: Uses username/password (recommended for most users)
+  # Authentication mode: "basic", "multi-user-basic", "oauth", or "login-flow"
+  # basic: Single-user with username/password (recommended for personal use)
+  # multi-user-basic: Multi-user with BasicAuth pass-through (credentials in request headers)
  # oauth: Uses OAuth2/OIDC (experimental, requires patches)
+  # login-flow: Multi-user via Nextcloud Login Flow v2 (experimental, ADR-022)
  mode: basic

-  # Basic authentication settings
+  # Basic authentication settings (single-user mode)
  basic:
    # Nextcloud username (ignored if existingSecret is set)
    username: ""
@@ -58,6 +67,47 @@ auth:
    usernameKey: "username"
    passwordKey: "password"

+  # Multi-user BasicAuth settings (pass-through mode)
+  # Users provide credentials in request headers (Authorization: Basic ...)
+  # Server optionally stores app passwords for background operations
+  multiUserBasic:
+    # Enable offline access (background operations using app passwords via Astrolabe)
+    # When enabled, requires token encryption key. OAuth client credentials are optional (uses DCR if not provided)
+    enableOfflineAccess: false
+    # Token encryption key (required if enableOfflineAccess: true, ignored if existingSecret is set)
+    # Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+    tokenEncryptionKey: ""
+    # Token storage database path
+    tokenStorageDb: "/app/data/tokens.db"
+    # OAuth client credentials (optional - uses Dynamic Client Registration if not provided)
+    # Only needed if enableOfflineAccess: true
+    clientId: ""
+    clientSecret: ""
+    # OAuth scopes to request (space-separated)
+    scopes: "openid profile email offline_access notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write"
+    # Use existing secret for multi-user basic auth credentials
+    # If set, tokenEncryptionKey, clientId, and clientSecret above are ignored
+    # Secret should contain keys specified in the *Key fields below
+    # Example:
+    #   kubectl create secret generic my-multiuser-creds \
+    #     --from-literal=token_encryption_key=ESF1BvEQ... \
+    #     --from-literal=client_id=my-client-id \
+    #     --from-literal=client_secret=my-client-secret
+    existingSecret: ""
+    # Keys in the existing secret
+    tokenEncryptionKeyKey: "token_encryption_key"
+    clientIdKey: "client_id"
+    clientSecretKey: "client_secret"
+    # Persistent storage for token database
+    persistence:
+      enabled: true
+      # Storage class (leave empty for default)
+      storageClass: ""
+      accessMode: ReadWriteOnce
+      size: 100Mi
+      # Use existing PVC
+      existingClaim: ""
+
  # OAuth2/OIDC settings (experimental)
  oauth:
    # OAuth token type: "jwt" or "opaque"
@@ -90,6 +140,43 @@ auth:
      # Use existing PVC
      existingClaim: ""

+  # Login Flow v2 settings (experimental, ADR-022)
+  # Uses Nextcloud's native Login Flow v2 to obtain app passwords per user.
+  # No OAuth patches required — works with stock Nextcloud.
+  # See: docs/ADR-022-deployment-mode-consolidation.md
+  loginFlow:
+    # Token encryption key (required, ignored if existingSecret is set)
+    # Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+    tokenEncryptionKey: ""
+    # Token storage database path
+    tokenStorageDb: "/app/data/tokens.db"
+    # Use existing secret instead of creating one
+    existingSecret: ""
+    # Key in the existing secret
+    tokenEncryptionKeyKey: "token_encryption_key"
+
+# Data Storage Configuration
+# Persistent volume for /app/data directory
+# Used for: token databases, qdrant persistent storage, and any app data
+# When disabled, uses emptyDir (non-persistent, but still writable)
+dataStorage:
+  # Enable persistent storage for /app/data
+  # Set to true when using:
+  # - Multi-user basic auth with offline access (stores tokens.db)
+  # - Login flow mode (stores app passwords in tokens.db)
+  # - Qdrant persistent mode (stores vector database)
+  # - Any feature requiring persistent app data
+  # Set to false for basic auth without persistence (uses emptyDir)
+  enabled: false
+  # Storage class (leave empty for default)
+  storageClass: ""
+  accessMode: ReadWriteOnce
+  # Size for data storage (should accommodate tokens.db and/or qdrant data)
+  # Recommended: 1Gi minimum, 5Gi for production with qdrant
+  size: 1Gi
+  # Use existing PVC
+  existingClaim: ""
+
 # MCP server configuration
 mcp:
  # Transport mode (default: streamable-http for SSE)
@@ -205,6 +292,20 @@ prometheusRule:
  # Additional labels for PrometheusRule (e.g., for Prometheus selector)
  # Example: { prometheus: kube-prometheus }

+# Grafana dashboards (requires Grafana with sidecar enabled)
+dashboards:
+  # Enable automatic dashboard provisioning via ConfigMap
+  enabled: false
+  # Grafana folder name where dashboards will be imported
+  # The grafana-sidecar looks for ConfigMaps with label "grafana_dashboard: 1"
+  # and reads the folder name from annotation "grafana_folder" (supports spaces)
+  grafanaFolder: "Nextcloud MCP"
+  # Additional labels for dashboard ConfigMap
+  # These will be added alongside the required "grafana_dashboard: 1" label
+  labels: {}
+  # Additional annotations for dashboard ConfigMap
+  annotations: {}
+
 service:
  type: ClusterIP
  port: 8000
@@ -302,10 +403,11 @@ extraEnvFrom: []
 # - secretRef:
 #     name: my-secret

-# Vector Sync Configuration
-# Background synchronization of Nextcloud content into vector database for semantic search
-vectorSync:
-  # Enable background vector synchronization
+# Semantic Search Configuration
+# Enable semantic search with BM25 hybrid search and background synchronization
+# of Nextcloud content into vector database
+semanticSearch:
+  # Enable semantic search and background vector synchronization
  enabled: false
  # Scan interval in seconds (how often to check for changes)
  scanInterval: 3600
@@ -316,7 +418,7 @@ vectorSync:

 # Document Chunking Configuration
 # Controls how documents are split into chunks before embedding
-# Only relevant when vectorSync.enabled is true
+# Only relevant when semanticSearch.enabled is true
 documentChunking:
  # Number of words per chunk (default: 512)
  # Smaller chunks (256-384): Better for precise searches, more chunks to store
@@ -0,0 +1,25 @@
+# CI-specific overrides for RAG evaluation pipeline
+# This file is used by the rag-evaluation.yml workflow to configure the MCP
+# container with OpenAI/GitHub Models API for vector embeddings.
+#
+# Usage:
+#   docker compose -f docker-compose.yml -f docker-compose.ci.yml up
+#
+# Environment variables (set in CI workflow):
+#   OPENAI_API_KEY - API key for embeddings (GitHub Models uses GITHUB_TOKEN)
+#   OPENAI_BASE_URL - API endpoint (e.g., https://models.github.ai/inference)
+#   OPENAI_EMBEDDING_MODEL - Model name (e.g., openai/text-embedding-3-small)
+#   OPENAI_GENERATION_MODEL - Model name for generation (e.g., openai/gpt-4o-mini)
+
+services:
+  mcp:
+    environment:
+      # OpenAI provider configuration (required for CI vector sync)
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://models.github.ai/inference}
+      - OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-openai/text-embedding-3-small}
+      - OPENAI_GENERATION_MODEL=${OPENAI_GENERATION_MODEL:-openai/gpt-4o-mini}
+      # Faster sync for CI
+      - VECTOR_SYNC_SCAN_INTERVAL=${VECTOR_SYNC_SCAN_INTERVAL:-5}
+      # Enable document processing for PDF parsing
+      - ENABLE_DOCUMENT_PROCESSING=true
@@ -3,11 +3,13 @@ services:
  # https://hub.docker.com/_/mariadb
  db:
    # Note: Check the recommend version here: https://docs.nextcloud.com/server/latest/admin_manual/installation/system_requirements.html#server
-    image: docker.io/library/mariadb:lts@sha256:ae6119716edac6998ae85508431b3d2e666530ddf4e94c61a10710caec9b0f71
+    image: docker.io/library/mariadb:lts@sha256:8164f184d16c30e2f159e30518113667b796306dff0fe558876ab1ff521a682f
    restart: always
    command: --transaction-isolation=READ-COMMITTED
    volumes:
      - db:/var/lib/mysql
+    ports:
+      - 127.0.0.1:3306:3306
    environment:
      - MYSQL_ROOT_PASSWORD=password
      - MYSQL_PASSWORD=password
@@ -17,24 +19,25 @@ services:
  # Note: Redis is an external service. You can find more information about the configuration here:
  # https://hub.docker.com/_/redis
  redis:
-    image: docker.io/library/redis:alpine@sha256:28c9c4d7596949a24b183eaaab6455f8e5d55ecbf72d02ff5e2c17fe72671d31
+    image: docker.io/library/redis:alpine@sha256:2afba59292f25f5d1af200496db41bea2c6c816b059f57ae74703a50a03a27d0
    restart: always

  app:
-    image: docker.io/library/nextcloud:32.0.1@sha256:5b043f7ea2f609d5ff5635f475c30d303bec17775a5c3f7fa435e3818e669120
+    image: ${NEXTCLOUD_IMAGE:-docker.io/library/nextcloud:32.0.6@sha256:5c4e09f72f096cd68379a8ae69f71e61d13da5a07430fc4a17c702a14e6a4267}
    restart: always
    ports:
-      - 0.0.0.0:8080:80
+      - 127.0.0.1:8080:80
    depends_on:
      - redis
      - db
-      - keycloak
    volumes:
      - nextcloud:/var/www/html
      - ./app-hooks:/docker-entrypoint-hooks.d:ro
      # Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
      # The post-installation hook will register /opt/apps as an additional app directory
-      - ./third_party:/opt/apps:ro
+      #- ./third_party:/opt/apps:ro
+      - ./third_party/astrolabe:/opt/apps/astrolabe:ro
+      #- ./third_party/oidc:/opt/apps/oidc:ro  # Use app store version; dev mount lacks vendor/
    environment:
      - NEXTCLOUD_TRUSTED_DOMAINS=app
      - NEXTCLOUD_ADMIN_USER=admin
@@ -44,6 +47,7 @@ services:
      - MYSQL_USER=nextcloud
      - MYSQL_HOST=db
      - REDIS_HOST=redis
+      - MCP_SERVER_URL=${MCP_SERVER_URL:-}
    healthcheck:
      test: ["CMD-SHELL", "curl -Ss http://localhost/status.php | grep '\"installed\":true' || exit 1"]
      interval: 10s
@@ -51,14 +55,14 @@ services:
      retries: 30

  recipes:
-    image: docker.io/library/nginx:alpine@sha256:b3c656d55d7ad751196f21b7fd2e8d4da9cb430e32f646adcf92441b72f82b14
+    image: docker.io/library/nginx:alpine@sha256:5878d06ae4c83d73285438255f705bb3f9a736f41cd24876ed25bb33faf76c7d
    restart: always
    volumes:
      - ./tests/fixtures/test_recipe.html:/usr/share/nginx/html/test_recipe.html:ro
      - ./tests/fixtures/nginx.conf:/etc/nginx/nginx.conf:ro

  unstructured:
-    image: downloads.unstructured.io/unstructured-io/unstructured-api:latest@sha256:54282d3a25f33fd6cf69bc45b3d37770f213593f58b6dfe5e85fe546376b2807
+    image: downloads.unstructured.io/unstructured-io/unstructured-api:latest@sha256:ba6cb073af079c498e9466a5a9152ba4b6c9cad12efeeaf053ba383023d5db08
    restart: always
    ports:
      - 127.0.0.1:8002:8000
@@ -69,23 +73,25 @@ services:

  mcp:
    build: .
-    command: ["--transport", "streamable-http"]
    restart: always
+    command: ["--transport", "streamable-http"]
    depends_on:
      app:
        condition: service_healthy
    ports:
      - 127.0.0.1:8000:8000
+      - 127.0.0.1:9090:9090
    volumes:
      - mcp-data:/app/data
    environment:
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_USERNAME=admin
      - NEXTCLOUD_PASSWORD=admin
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080

-      # Vector sync configuration (ADR-007)
-      - VECTOR_SYNC_ENABLED=true
-      - VECTOR_SYNC_SCAN_INTERVAL=10
+      # Semantic search configuration (ADR-007, ADR-021)
+      #- ENABLE_SEMANTIC_SEARCH=true
+      - VECTOR_SYNC_SCAN_INTERVAL=60
      - VECTOR_SYNC_PROCESSOR_WORKERS=1

      #- LOG_FORMAT=json
@@ -119,6 +125,46 @@ services:
      # Tune these based on your embedding model and content type
      # - DOCUMENT_CHUNK_SIZE=512      # Words per chunk (default: 512)
      # - DOCUMENT_CHUNK_OVERLAP=50    # Overlapping words (default: 50, recommended: 10-20% of chunk size)
+    profiles:
+      - single-user
+
+  mcp-multi-user-basic:
+    build: .
+    restart: always
+    command: ["--transport", "streamable-http"]
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8003:8000
+    environment:
+      # Multi-user BasicAuth pass-through mode (ADR-020)
+      - NEXTCLOUD_HOST=http://app:80
+      - NEXTCLOUD_MCP_SERVER_URL=http://localhost:8003
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
+      - ENABLE_MULTI_USER_BASIC_AUTH=true
+      - ENABLE_BACKGROUND_OPERATIONS=true
+
+      # Token storage (required for middleware initialization)
+      # DEVELOPMENT ONLY - generate a fresh key for production:
+      # python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+      - TOKEN_ENCRYPTION_KEY=fqqI4G51yBCOcu9cvv6wCUJB7sf_CK2za5ClC6b86yY=
+      - TOKEN_STORAGE_DB=/app/data/tokens.db
+
+      - ENABLE_SEMANTIC_SEARCH=true
+      - VECTOR_SYNC_SCAN_INTERVAL=60
+      - VECTOR_SYNC_PROCESSOR_WORKERS=1
+
+      # OAuth credentials for background sync (optional - uses DCR if not provided)
+      # Uncomment to avoid DCR:
+      # - NEXTCLOUD_OIDC_CLIENT_ID=your_client_id
+      # - NEXTCLOUD_OIDC_CLIENT_SECRET=your_client_secret
+
+      # NO admin credentials - credentials come from client Authorization header
+    volumes:
+      - multi-user-basic-data:/app/data
+    profiles:
+      - multi-user-basic

  mcp-oauth:
    build: .
@@ -140,23 +186,38 @@ services:
      - NEXTCLOUD_OIDC_SCOPES=openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write

      # Refresh token storage (ADR-002 Tier 1)
-      - ENABLE_OFFLINE_ACCESS=true
-      - TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
+      - ENABLE_BACKGROUND_OPERATIONS=true
+      - TOKEN_ENCRYPTION_KEY=Qh60VwZQsM7CLtSMunzC0gIGPBT948S6VSawUkODtvU=
      - TOKEN_STORAGE_DB=/app/data/tokens.db

      # ADR-005: Multi-audience mode (default - ENABLE_TOKEN_EXCHANGE=false)
      # Tokens must contain BOTH MCP and Nextcloud audiences
      # No token exchange needed - tokens work for both MCP auth and Nextcloud APIs

+      # Semantic search configuration (ADR-007, ADR-021)
+      - ENABLE_SEMANTIC_SEARCH=true
+      - VECTOR_SYNC_SCAN_INTERVAL=60
+      - VECTOR_SYNC_PROCESSOR_WORKERS=1
+
+      # Qdrant configuration - persistent local storage
+      - QDRANT_LOCATION=/app/data/qdrant
+
+      # Embedding provider for vector sync (use Simple provider as fallback)
+      # Ollama not available in CI/test environments
+      # - OLLAMA_BASE_URL=http://ollama:11434
+      # - OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
      # NO admin credentials - using OAuth with Dynamic Client Registration (DCR)
      # Client credentials registered via RFC 7591 and stored in volume
      # JWT token type is used for testing (faster validation, scopes embedded in token)
    volumes:
      - oauth-client-storage:/app/.oauth
      - oauth-tokens:/app/data
+    profiles:
+      - oauth

  keycloak:
-    image: quay.io/keycloak/keycloak:26.4.4@sha256:c6459d5fae1b759f5d667ebdc6237ab3121379c3494e213898569014ede1846d
+    image: quay.io/keycloak/keycloak:26.5.4@sha256:ae8efb0d218d8921334b03a2dbee7069a0b868240691c50a3ffc9f42fabba8b4
    command:
      - "start-dev"
      - "--import-realm"
@@ -176,6 +237,8 @@ services:
      interval: 10s
      timeout: 5s
      retries: 30
+    profiles:
+      - keycloak

  mcp-keycloak:
    build: .
@@ -193,8 +256,8 @@ services:
      # Provider auto-detected from OIDC_DISCOVERY_URL issuer
      # Using internal Docker hostname for discovery to get consistent issuer
      - OIDC_DISCOVERY_URL=http://keycloak:8080/realms/nextcloud-mcp/.well-known/openid-configuration
-      - OIDC_CLIENT_ID=nextcloud-mcp-server
-      - OIDC_CLIENT_SECRET=mcp-secret-change-in-production
+      - NEXTCLOUD_OIDC_CLIENT_ID=nextcloud-mcp-server
+      - NEXTCLOUD_OIDC_CLIENT_SECRET=mcp-secret-change-in-production
      - OIDC_JWKS_URI=http://keycloak:8080/realms/nextcloud-mcp/protocol/openid-connect/certs

      # Nextcloud API endpoint (for accessing APIs with validated token)
@@ -204,7 +267,7 @@ services:
      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8888/realms/nextcloud-mcp

      # Refresh token storage (ADR-002 Tier 1 & 2)
-      - ENABLE_OFFLINE_ACCESS=true
+      - ENABLE_BACKGROUND_OPERATIONS=true
      - TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
      - TOKEN_STORAGE_DB=/app/data/tokens.db

@@ -221,9 +284,68 @@ services:
    volumes:
      - keycloak-tokens:/app/data
      - keycloak-oauth-storage:/app/.oauth
+    profiles:
+      - keycloak
+
+  # Login Flow v2 mode (ADR-022)
+  # Test with: docker compose --profile login-flow up --build -d
+  mcp-login-flow:
+    build: .
+    restart: always
+    # --oauth enables the OAuth/OIDC identity layer that Login Flow v2 builds on
+    # (user identity via OAuth session, Nextcloud access via app passwords)
+    command: ["--transport", "streamable-http", "--oauth", "--port", "8004"]
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8004:8004
+    environment:
+      - NEXTCLOUD_HOST=http://app:80
+      - NEXTCLOUD_MCP_SERVER_URL=http://localhost:8004
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
+
+      # Login Flow v2 (ADR-022)
+      - ENABLE_LOGIN_FLOW=true
+
+      # Token storage (required for app password + session persistence)
+      # DEVELOPMENT ONLY - generate a fresh key for production:
+      # python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+      - TOKEN_ENCRYPTION_KEY=rxJvkBf7ZBjZZDL4a1sSqjhmjawhmbRMSOGfK8HDyKU=
+      - TOKEN_STORAGE_DB=/app/data/tokens.db
+
+      # Semantic search
+      - ENABLE_SEMANTIC_SEARCH=true
+      - VECTOR_SYNC_SCAN_INTERVAL=60
+      - VECTOR_SYNC_PROCESSOR_WORKERS=1
+    volumes:
+      - login-flow-data:/app/data
+      - login-flow-oauth-storage:/app/.oauth
+    profiles:
+      - login-flow
+
+  # Smithery stateless deployment mode (ADR-016)
+  # Test with: docker compose --profile smithery up smithery
+  # Then: curl http://localhost:8081/.well-known/mcp-config
+  smithery:
+    build:
+      context: .
+      dockerfile: Dockerfile.smithery
+    restart: always
+    depends_on:
+      app:
+        condition: service_healthy
+    ports:
+      - 127.0.0.1:8081:8081
+    environment:
+      - SMITHERY_DEPLOYMENT=true
+      - ENABLE_SEMANTIC_SEARCH=false
+      - PORT=8081
+    profiles:
+      - smithery

  qdrant:
-    image: qdrant/qdrant:v1.15.5@sha256:0fb8897412abc81d1c0430a899b9a81eb8328aa634e7242d1bc804c1fe8fe863
+    image: docker.io/qdrant/qdrant:v1.17.0@sha256:f1c7272cdac52b38c1a0e89313922d940ba50afd90d593a1605dbbc214e66ffb
    restart: always
    ports:
      - 127.0.0.1:6333:6333  # REST API
@@ -247,5 +369,8 @@ volumes:
  oauth-tokens:
  keycloak-tokens:
  keycloak-oauth-storage:
+  login-flow-data:
+  login-flow-oauth-storage:
  qdrant-data:
  mcp-data:
+  multi-user-basic-data:
@@ -0,0 +1,943 @@
+# ADR-011: Improving Semantic Search Quality Through Better Chunking and Embeddings
+
+**Status**: Partially Implemented (Chunking Complete, Embeddings Pending)
+**Date**: 2025-11-12
+**Implementation Date**: 2025-11-18 (Chunking)
+**Authors**: Development Team
+**Related**: ADR-003 (Vector Database Architecture), ADR-008 (MCP Sampling for RAG)
+
+## Context
+
+The semantic search implementation provides document retrieval across Nextcloud apps using vector embeddings. Production usage has revealed that **the system frequently misses relevant documents** (recall problem).
+
+Root cause analysis identifies two fundamental issues:
+
+### 1. Poor Chunking Strategy
+
+**Current Implementation** (`nextcloud_mcp_server/vector/document_chunker.py:36`):
+```python
+words = content.split()  # Naive whitespace splitting
+chunk_size = 512  # words
+overlap = 50  # words
+chunks = [words[i:i+chunk_size] for i in range(0, len(words), chunk_size-overlap)]
+```
+
+**Problems**:
+- **Breaks semantic boundaries**: Splits mid-sentence, mid-paragraph, mid-thought
+- **Loses context**: "The meeting discussed budget. We decided to..." becomes two disconnected chunks
+- **Poor retrieval**: Relevant content split across chunks with low individual relevance scores
+- **No structure awareness**: Ignores markdown headers, lists, code blocks
+
+**Evidence**:
+- Documents with relevant content in middle sections score poorly (content split across 3+ chunks)
+- Multi-sentence concepts (spanning 60-100 words) are fragmented
+- Search for "budget planning process" misses documents where these words appear in adjacent sentences but different chunks
+
+### 2. Suboptimal Embedding Model
+
+**Current Implementation** (`nextcloud_mcp_server/embedding/ollama_provider.py:33`):
+```python
+_model = "nomic-embed-text"  # 768 dimensions
+_dimension = 768  # Hardcoded
+```
+
+**Problems**:
+- **Model selection**: `nomic-embed-text` is general-purpose, not optimized for our use case
+- **No benchmarking**: Selected without comparative evaluation
+- **Dimensionality**: 768-dim may be insufficient for nuanced semantic distinctions
+- **No domain adaptation**: Model not tuned for Nextcloud content (notes, calendar, deck cards)
+
+**Evidence**:
+- Synonymous queries return different results ("meeting notes" vs. "discussion summary")
+- Domain-specific terms poorly represented ("standup", "retrospective", "OKRs")
+- Cross-lingual content (if present) not well supported
+
+### Current Performance
+
+**Baseline Metrics** (100-document test corpus, 50 queries):
+- **Recall@10**: ~52% (misses 48% of relevant documents)
+- **Precision@10**: ~78% (acceptable but room for improvement)
+- **MRR**: 0.58 (relevant docs often not in top positions)
+- **Zero-result queries**: 18% (completely missing relevant content)
+
+## Decision Drivers
+
+1. **Address Root Causes**: Fix fundamental issues (chunking, embeddings) before adding complexity (reranking, hybrid search)
+2. **Measurable Impact**: Target 40-60% improvement in recall through chunking/embedding alone
+3. **Independence**: Improvements should be orthogonal to future enhancements (reranking, GraphRAG)
+4. **Cost Efficiency**: Minimize infrastructure and API costs
+5. **Reindexing Acceptable**: One-time reindex cost justified by long-term quality improvement
+
+## Options Considered
+
+### Chunking Strategies
+
+#### Option C1: Semantic Sentence-Aware Chunking (RECOMMENDED)
+
+**Description**: Respect sentence boundaries while maintaining target chunk size
+
+**Implementation**:
+```python
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=2048,  # ~512 words in characters
+    chunk_overlap=200,  # ~50 words in characters
+    separators=["\n\n", "\n", ". ", "! ", "? ", "; ", ": ", ", ", " "],
+    length_function=len,
+)
+```
+
+**How it works**:
+1. Try splitting by paragraphs (`\n\n`)
+2. If chunks too large, split by sentences (`. `, `! `, `? `)
+3. If still too large, split by clauses (`;`, `:`)
+4. Last resort: split by words
+
+**Pros**:
+- ✅ Preserves semantic boundaries (never breaks mid-sentence)
+- ✅ Maintains context coherence within chunks
+- ✅ Simple implementation (langchain library)
+- ✅ Configurable separators for different content types
+- ✅ Proven approach (used by major RAG systems)
+
+**Cons**:
+- ❌ Variable chunk sizes (not exactly 512 words, but close)
+- ❌ Adds dependency (langchain)
+- ❌ Slightly slower than naive splitting (~10-20ms per document)
+
+**Expected Impact**: 20-30% recall improvement
+
+#### Option C2: Hierarchical Context-Preserving Chunks
+
+**Description**: Create overlapping parent/child chunks
+
+**Structure**:
+```
+Document → Large parent chunks (1024 words) → Small child chunks (256 words)
+          ↓                                    ↓
+   Stored in Qdrant                       Searched first
+                                          Return parent context
+```
+
+**Implementation**:
+```python
+# Generate child chunks (searched)
+child_chunks = splitter.split_text(content, chunk_size=1024)
+
+# Generate parent chunks (context)
+parent_chunks = splitter.split_text(content, chunk_size=4096)
+
+# Store both with parent-child relationships
+for child_idx, child in enumerate(child_chunks):
+    parent_idx = find_parent(child_idx)
+    store_vector(
+        vector=embed(child),
+        payload={
+            "chunk": child,
+            "parent_chunk": parent_chunks[parent_idx],
+            "chunk_type": "child"
+        }
+    )
+```
+
+**Pros**:
+- ✅ Best of both worlds: precise matching + full context
+- ✅ Handles multi-hop information needs
+- ✅ Better for long documents (> 1000 words)
+
+**Cons**:
+- ❌ 2x storage (parent + child chunks)
+- ❌ More complex implementation
+- ❌ Higher indexing time (embed twice)
+- ❌ Query complexity (retrieve child, return parent)
+
+**Expected Impact**: 35-45% recall improvement (diminishing returns vs. complexity)
+
+**Verdict**: ⚠️ Consider only if Option C1 insufficient
+
+#### Option C3: Document Structure-Aware Chunking
+
+**Description**: Parse markdown/document structure before chunking
+
+**Implementation**:
+```python
+import mistune  # Markdown parser
+
+def structure_aware_chunk(markdown_content: str) -> list[str]:
+    ast = mistune.create_markdown(renderer='ast')(markdown_content)
+
+    chunks = []
+    for node in ast:
+        if node['type'] == 'heading':
+            # Start new chunk at each header
+            current_chunk = node['children'][0]['raw']
+        elif node['type'] == 'paragraph':
+            current_chunk += "\n" + node['children'][0]['raw']
+            if len(current_chunk) > 2048:
+                chunks.append(current_chunk)
+                current_chunk = ""
+
+    return chunks
+```
+
+**Pros**:
+- ✅ Respects document logical structure
+- ✅ Headers provide context for chunks
+- ✅ Works well for structured notes (documentation, meeting notes with sections)
+
+**Cons**:
+- ❌ Complex implementation (parser, AST traversal)
+- ❌ Markdown-specific (doesn't help calendar events, deck cards)
+- ❌ Variable chunk sizes (some sections very short/long)
+- ❌ Breaks for unstructured content
+
+**Expected Impact**: 15-25% improvement for structured content only
+
+**Verdict**: ⚠️ Future enhancement after Option C1
+
+#### Option C4: Fixed Sliding Window (Current Baseline)
+
+**Description**: Current naive word-based splitting
+
+**Verdict**: ❌ Superseded by Option C1
+
+### Embedding Model Strategies
+
+#### Option E1: Upgrade to Better General-Purpose Model (RECOMMENDED)
+
+**Description**: Switch to state-of-the-art embedding model
+
+**Candidates**:
+
+| Model | Dimensions | MTEB Score | Pros | Cons |
+|-------|-----------|------------|------|------|
+| **mxbai-embed-large** | 1024 | 64.68 | Best performance, good balance | Larger (slower) |
+| **nomic-embed-text-v1.5** | 768 | 62.39 | Upgraded version of current | Incremental improvement |
+| **bge-large-en-v1.5** | 1024 | 64.23 | Excellent for English | Not multilingual |
+| **nomic-embed-text** (current) | 768 | 60.10 | Baseline | Lower performance |
+
+**MTEB**: Massive Text Embedding Benchmark (higher = better semantic understanding)
+
+**Recommendation**: **mxbai-embed-large-v1**
+- Best MTEB score (64.68)
+- 1024 dimensions (richer semantic space)
+- Works well via Ollama
+- ~15-20% better retrieval quality in benchmarks
+
+**Implementation**:
+```python
+# config.py
+OLLAMA_EMBEDDING_MODEL = "mxbai-embed-large-v1"  # Changed from nomic-embed-text
+
+# ollama_provider.py
+async def get_dimension(self) -> int:
+    # Query Ollama for actual dimension instead of hardcoding
+    response = await self.client.post("/api/show", json={"name": self.model})
+    return response.json()["details"]["embedding_length"]
+```
+
+**Migration**:
+1. Deploy new model to Ollama
+2. Create new Qdrant collection (different dimension)
+3. Reindex all documents with new embeddings
+4. Swap collections atomically
+5. Delete old collection
+
+**Pros**:
+- ✅ Immediate quality improvement (15-20%)
+- ✅ Simple change (config + reindex)
+- ✅ No code complexity
+- ✅ Future-proof (state-of-the-art model)
+
+**Cons**:
+- ❌ Requires full reindex (2-4 hours for 1000 documents)
+- ❌ Larger model = slower embedding (~50ms vs. 30ms per chunk)
+- ❌ Higher dimensionality = more storage (~30% increase)
+
+**Expected Impact**: 15-25% recall improvement
+
+#### Option E2: Multi-Vector Embeddings (ColBERT-style)
+
+**Description**: Generate multiple embeddings per chunk (token-level)
+
+**Architecture**:
+```
+Chunk → Transformer → Token embeddings (e.g., 50 tokens × 128 dim) → Store all
+Query → Transformer → Token embeddings → MaxSim(query_tokens, doc_tokens)
+```
+
+**MaxSim scoring**:
+```python
+def maxsim_score(query_embeddings, doc_embeddings):
+    # For each query token, find max similarity with any doc token
+    scores = []
+    for q_emb in query_embeddings:
+        max_sim = max(cosine_similarity(q_emb, d_emb) for d_emb in doc_embeddings)
+        scores.append(max_sim)
+    return sum(scores)
+```
+
+**Pros**:
+- ✅ Best retrieval quality (state-of-the-art results)
+- ✅ Fine-grained matching (token-level)
+- ✅ Handles partial matches better
+
+**Cons**:
+- ❌ **50-100x storage increase** (50 vectors per chunk vs. 1)
+- ❌ **Slower search** (compute MaxSim for each candidate)
+- ❌ **Complex implementation** (custom scoring, storage schema)
+- ❌ **Requires specialized model** (ColBERTv2, not available in Ollama)
+
+**Expected Impact**: 40-50% improvement, but at very high cost
+
+**Verdict**: ❌ Too complex, too expensive for marginal gain over E1+C1
+
+#### Option E3: Fine-Tuned Domain-Specific Model
+
+**Description**: Fine-tune embedding model on Nextcloud corpus
+
+**Process**:
+1. Collect training data (query-document pairs)
+2. Fine-tune base model (e.g., `nomic-embed-text`) on domain data
+3. Deploy fine-tuned model via Ollama
+4. Reindex with fine-tuned embeddings
+
+**Training data needed**:
+- 1,000+ query-document pairs
+- Labeled relevance (positive/negative examples)
+- Representative of real usage
+
+**Pros**:
+- ✅ Optimized for specific content (notes, calendar, deck)
+- ✅ Better handling of domain terminology
+- ✅ Highest potential quality improvement (30-40%)
+
+**Cons**:
+- ❌ **Requires training data** (expensive to collect)
+- ❌ **GPU infrastructure** needed for fine-tuning
+- ❌ **Expertise required** (ML/NLP knowledge)
+- ❌ **Maintenance burden** (retrain as corpus evolves)
+- ❌ **Time investment**: 2-4 weeks initial setup
+
+**Expected Impact**: 30-40% improvement, but high cost
+
+**Verdict**: ⚠️ Consider only if E1+C1 insufficient AND have training data
+
+#### Option E4: Ensemble Embeddings
+
+**Description**: Generate embeddings with multiple models, combine scores
+
+**Implementation**:
+```python
+models = ["mxbai-embed-large-v1", "bge-large-en-v1.5"]
+
+# Index
+embeddings = [await embed(chunk, model) for model in models]
+store_multi_vector(embeddings)
+
+# Search
+query_embeddings = [await embed(query, model) for model in models]
+scores = [search(q_emb, model) for q_emb, model in zip(query_embeddings, models)]
+combined_score = 0.5 * scores[0] + 0.5 * scores[1]
+```
+
+**Pros**:
+- ✅ Robust to individual model weaknesses
+- ✅ Better coverage of semantic space
+
+**Cons**:
+- ❌ 2x storage and compute
+- ❌ Complex scoring and fusion
+- ❌ Marginal improvement (~5-10%) over single best model
+
+**Expected Impact**: 5-10% over best single model
+
+**Verdict**: ❌ Not worth complexity
+
+### Combined Strategies
+
+#### Option D1: Best Chunking + Best Embedding (RECOMMENDED)
+
+**Combination**: Option C1 (Semantic Chunking) + Option E1 (mxbai-embed-large-v1)
+
+**Expected Impact**:
+- Chunking: +20-30% recall
+- Embedding: +15-25% recall
+- **Combined**: +35-55% recall improvement (not strictly additive, but significant)
+
+**Cost**:
+- Development: 1-2 days
+- Reindex: 2-4 hours (one-time)
+- Ongoing: None (same infrastructure)
+
+**Pros**:
+- ✅ Addresses both root causes
+- ✅ Orthogonal improvements (chunking + embedding)
+- ✅ Simple implementation
+- ✅ No new infrastructure
+- ✅ Future-proof foundation for additional enhancements (reranking, hybrid search)
+
+**Cons**:
+- ❌ Requires full reindex (manageable)
+- ❌ Slightly higher storage (1024 vs. 768 dim)
+
+**Verdict**: ✅ **RECOMMENDED**
+
+## Decision
+
+**Adopt Option D1: Semantic Chunking + Upgraded Embedding Model**
+
+Implement both improvements together to maximize recall improvement:
+
+### 1. Semantic Sentence-Aware Chunking
+
+**Changes**:
+- Replace naive word splitting with `RecursiveCharacterTextSplitter`
+- Preserve sentence boundaries, paragraph structure
+- Maintain similar chunk sizes (~512 words / 2048 characters)
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/vector/document_chunker.py
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+class DocumentChunker:
+    """Chunk documents into semantically coherent pieces."""
+
+    def __init__(
+        self,
+        chunk_size: int = 2048,  # Characters, not words
+        chunk_overlap: int = 200,  # Characters, not words
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            separators=[
+                "\n\n",  # Paragraphs (highest priority)
+                "\n",    # Lines
+                ". ",    # Sentences
+                "! ",
+                "? ",
+                "; ",    # Clauses
+                ": ",
+                ", ",    # Phrases
+                " ",     # Words (last resort)
+            ],
+            length_function=len,
+            is_separator_regex=False,
+        )
+
+    def chunk_text(self, content: str) -> list[str]:
+        """
+        Chunk text while preserving semantic boundaries.
+
+        Args:
+            content: Full document text
+
+        Returns:
+            List of text chunks, each ending at a semantic boundary
+        """
+        if not content:
+            return []
+
+        # Use RecursiveCharacterTextSplitter for semantic boundaries
+        chunks = self.splitter.split_text(content)
+
+        return chunks
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old (word-based)
+DOCUMENT_CHUNK_SIZE: int = 512  # words
+DOCUMENT_CHUNK_OVERLAP: int = 50  # words
+
+# New (character-based, more precise)
+DOCUMENT_CHUNK_SIZE: int = 2048  # characters (~512 words)
+DOCUMENT_CHUNK_OVERLAP: int = 200  # characters (~50 words)
+```
+
+**Dependency** (`pyproject.toml`):
+```toml
+[project]
+dependencies = [
+    # ... existing dependencies
+    "langchain-text-splitters>=0.2.0",
+]
+```
+
+### 2. Upgrade Embedding Model
+
+**Changes**:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Dynamic dimension detection (query Ollama instead of hardcoding)
+- Create new Qdrant collection for new dimensions
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/embedding/ollama_provider.py
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    def __init__(self, base_url: str, model: str, verify_ssl: bool = True):
+        self.base_url = base_url
+        self.model = model
+        self._dimension: int | None = None  # Changed: query dynamically
+        self.client = httpx.AsyncClient(base_url=base_url, verify=verify_ssl)
+
+    async def dimension(self) -> int:
+        """Get embedding dimension from Ollama API."""
+        if self._dimension is None:
+            try:
+                response = await self.client.post(
+                    "/api/show",
+                    json={"name": self.model},
+                    timeout=10.0,
+                )
+                response.raise_for_status()
+                info = response.json()
+                self._dimension = info.get("details", {}).get("embedding_length")
+
+                if self._dimension is None:
+                    # Fallback: generate test embedding to detect dimension
+                    test_emb = await self.embed("test")
+                    self._dimension = len(test_emb)
+
+            except Exception as e:
+                logger.warning(f"Failed to get dimension from Ollama: {e}, using fallback")
+                # Fallback dimensions by model name
+                if "mxbai-embed-large" in self.model:
+                    self._dimension = 1024
+                elif "nomic-embed-text" in self.model:
+                    self._dimension = 768
+                else:
+                    self._dimension = 768  # Default
+
+        return self._dimension
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old
+OLLAMA_EMBEDDING_MODEL: str = "nomic-embed-text"
+
+# New
+OLLAMA_EMBEDDING_MODEL: str = "mxbai-embed-large-v1"
+```
+
+**Environment Variable**:
+```bash
+OLLAMA_EMBEDDING_MODEL=mxbai-embed-large-v1
+```
+
+### 3. Migration Strategy
+
+**Reindexing Process**:
+
+```python
+# nextcloud_mcp_server/vector/migration.py
+
+async def migrate_to_new_embeddings():
+    """
+    Migrate from old embeddings to new embeddings.
+
+    Process:
+    1. Create new collection with new dimension
+    2. Reindex all documents with new embeddings
+    3. Atomic swap (update collection name in config)
+    4. Delete old collection
+    """
+    old_collection = "nextcloud_content"
+    new_collection = "nextcloud_content_v2"
+
+    # 1. Create new collection
+    await qdrant_client.create_collection(
+        collection_name=new_collection,
+        vectors_config=VectorParams(
+            size=1024,  # mxbai-embed-large-v1 dimension
+            distance=Distance.COSINE,
+        ),
+    )
+
+    # 2. Reindex all documents
+    logger.info("Starting reindex with new embeddings...")
+    scanner = VectorScanner(...)
+    processor = VectorProcessor(collection_name=new_collection, ...)
+
+    await scanner.scan_all()  # Rescans and re-embeds all documents
+
+    # 3. Wait for completion
+    while True:
+        status = await get_sync_status()
+        if status.pending_documents == 0:
+            break
+        await asyncio.sleep(5)
+
+    # 4. Atomic swap
+    # Update config to point to new collection
+    # (or use collection alias in Qdrant)
+    await qdrant_client.update_collection_aliases(
+        change_aliases_operations=[
+            CreateAliasOperation(
+                create_alias=CreateAlias(
+                    collection_name=new_collection,
+                    alias_name="nextcloud_content"
+                )
+            )
+        ]
+    )
+
+    # 5. Verify new collection works
+    test_results = await run_benchmark_queries()
+    if test_results.recall < baseline_recall:
+        # Rollback
+        logger.error("New embeddings worse than baseline, rolling back")
+        await rollback_migration()
+        return False
+
+    # 6. Delete old collection
+    await qdrant_client.delete_collection(old_collection)
+    logger.info("Migration complete!")
+    return True
+```
+
+**Downtime Mitigation**:
+- Use Qdrant collection aliases for atomic swap
+- Reindex can happen in background
+- Only brief downtime during alias swap (~1s)
+
+**Rollback Plan**:
+- Keep old collection until validation complete
+- If new embeddings worse, swap alias back to old collection
+- No data loss
+
+### 4. Validation & Benchmarking
+
+**Before/After Comparison**:
+
+```python
+# tests/benchmarks/chunking_embedding_comparison.py
+
+async def benchmark_chunking_embeddings():
+    """
+    Compare old vs. new chunking and embeddings on test queries.
+    """
+    test_queries = load_benchmark_queries()  # 100 queries with known relevant docs
+
+    # Baseline (current)
+    baseline_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content",  # Old: nomic-embed-text, word chunks
+    )
+
+    # New implementation
+    new_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content_v2",  # New: mxbai-embed-large-v1, semantic chunks
+    )
+
+    # Compare metrics
+    comparison = {
+        "baseline": {
+            "recall@10": calculate_recall(baseline_results, k=10),
+            "precision@10": calculate_precision(baseline_results, k=10),
+            "mrr": calculate_mrr(baseline_results),
+            "zero_result_rate": calculate_zero_result_rate(baseline_results),
+        },
+        "new": {
+            "recall@10": calculate_recall(new_results, k=10),
+            "precision@10": calculate_precision(new_results, k=10),
+            "mrr": calculate_mrr(new_results),
+            "zero_result_rate": calculate_zero_result_rate(new_results),
+        },
+        "improvement": {
+            "recall_improvement": (new_recall - baseline_recall) / baseline_recall,
+            "precision_improvement": (new_precision - baseline_precision) / baseline_precision,
+        }
+    }
+
+    return comparison
+```
+
+**Success Criteria**:
+- **Recall@10**: Improve from ~52% to ≥75% (+40% improvement)
+- **Precision@10**: Maintain ≥75% (no degradation)
+- **MRR**: Improve from 0.58 to ≥0.70
+- **Zero-result rate**: Reduce from 18% to ≤10%
+- **Indexing time**: Maintain ≤10s per document
+
+**Validation Process**:
+1. Run benchmark on baseline (current implementation)
+2. Implement changes
+3. Run benchmark on new implementation
+4. Compare metrics
+5. If improvement ≥40%, proceed to production
+6. If improvement <40%, investigate and iterate
+
+## Implementation Timeline
+
+### Week 1: Development & Testing
+
+**Day 1-2: Chunking Implementation**
+- [ ] Add langchain-text-splitters dependency
+- [ ] Refactor `document_chunker.py`
+- [ ] Update configuration (character-based chunk sizes)
+- [ ] Write unit tests for semantic boundaries
+- [ ] Validate: Chunks never break mid-sentence
+
+**Day 3-4: Embedding Implementation**
+- [ ] Update `ollama_provider.py` with dynamic dimension detection
+- [ ] Update configuration (new model name)
+- [ ] Deploy `mxbai-embed-large-v1` to Ollama
+- [ ] Test embedding generation with new model
+- [ ] Validate: Embeddings are 1024-dim
+
+**Day 5: Migration Script**
+- [ ] Write migration script (collection creation, reindexing, alias swap)
+- [ ] Test migration on staging environment
+- [ ] Validate: No data loss, atomic swap works
+
+### Week 2: Reindexing & Validation
+
+**Day 1-2: Staging Reindex**
+- [ ] Run full reindex on staging environment
+- [ ] Monitor indexing performance
+- [ ] Validate: All documents indexed correctly
+
+**Day 3: Benchmarking**
+- [ ] Run benchmark queries on old collection (baseline)
+- [ ] Run benchmark queries on new collection
+- [ ] Compare metrics (recall, precision, MRR)
+- [ ] Validate: ≥40% recall improvement
+
+**Day 4: Production Reindex**
+- [ ] Schedule maintenance window (optional, can run in background)
+- [ ] Run migration script on production
+- [ ] Monitor reindexing progress
+- [ ] Atomic swap when complete
+
+**Day 5: Production Validation**
+- [ ] Monitor search quality metrics
+- [ ] Collect user feedback
+- [ ] Compare production metrics to staging
+- [ ] Rollback if issues detected
+
+## Cost Analysis
+
+### Development Cost
+- **Time**: 1-2 weeks (implementation + validation)
+- **Effort**: 40-60 hours @ $100/hour = $4,000 - $6,000
+
+### Infrastructure Cost
+- **Storage**: +30% (1024-dim vs. 768-dim)
+  - Example: 1,000 notes × 3 chunks × 1024 dim × 4 bytes = 12 MB (negligible)
+- **Compute**: +20% embedding time (50ms vs. 30ms per chunk)
+  - Amortized over batch indexing, minimal impact
+- **No new infrastructure**: Uses existing Ollama + Qdrant
+
+### Reindexing Cost (One-Time)
+- **Time**: 2-4 hours for 1,000 documents
+  - 1,000 docs × 3 chunks × 50ms = 150 seconds (~2.5 minutes embedding)
+  - + Ollama processing time + Qdrant insertion
+- **Downtime**: ~1 second (atomic alias swap)
+
+### Total Cost
+- **Initial**: $4,000 - $6,000 (development + testing)
+- **Ongoing**: $0 (no new infrastructure or API costs)
+
+### ROI
+- **Recall improvement**: +40-60% (finding relevant documents)
+- **User satisfaction**: Reduced zero-result queries (18% → 10%)
+- **Foundation**: Enables future enhancements (reranking, hybrid search)
+- **Cost per % improvement**: $100 - $150 (excellent ROI)
+
+## Consequences
+
+### Positive
+
+1. **Addresses Root Causes**: Fixes fundamental issues (chunking, embeddings) not symptoms
+2. **High Impact**: Expected 40-60% recall improvement from foundational changes
+3. **Future-Proof**: Creates solid foundation for future enhancements (reranking, hybrid search, GraphRAG)
+4. **Simple**: No architectural changes, no new infrastructure
+5. **Orthogonal**: Improvements are independent, can be validated separately
+6. **Low Risk**: Proven techniques (RecursiveCharacterTextSplitter, mxbai-embed-large-v1)
+7. **Maintainable**: Standard libraries and models, easy to debug
+
+### Negative
+
+1. **Reindexing Required**: 2-4 hours one-time cost (manageable, can run in background)
+2. **Storage Increase**: +30% for higher-dimensional embeddings (12 MB vs. 9 MB for 1K docs)
+3. **Slower Indexing**: +20% embedding time (50ms vs. 30ms per chunk)
+4. **Dependency**: Adds langchain-text-splitters (minimal, well-maintained library)
+5. **Not a Complete Solution**: May still need reranking/hybrid search for optimal recall (but solid foundation)
+
+### Neutral
+
+1. **Model Lock-In**: Committed to mxbai-embed-large-v1, but can change later (another reindex)
+2. **Chunk Size Trade-offs**: ~512 words is heuristic, may need tuning for specific content types
+
+## Monitoring & Success Metrics
+
+### Real-Time Metrics (Grafana)
+
+**Search Quality**:
+- `semantic_search_recall_at_10` (target: ≥75%)
+- `semantic_search_precision_at_10` (target: ≥75%)
+- `semantic_search_mrr` (target: ≥0.70)
+- `semantic_search_zero_result_rate` (target: ≤10%)
+
+**Performance**:
+- `semantic_search_latency_ms` (p50, p95, p99)
+- `embedding_generation_time_ms`
+- `indexing_throughput_docs_per_sec`
+
+**Indexing**:
+- `documents_indexed_total`
+- `documents_pending`
+- `indexing_errors_total`
+
+### Weekly Validation
+
+**A/B Testing** (if gradual rollout):
+- 50% users: New embeddings
+- 50% users: Old embeddings
+- Compare metrics for 1 week
+- Full rollout if new embeddings superior
+
+**User Feedback**:
+- Survey: "How satisfied are you with search results?" (1-5 scale)
+- Track: Number of "search not working" support tickets
+- Monitor: User-reported false negatives ("I know this doc exists")
+
+### Rollback Criteria
+
+**Automatic Rollback** if:
+- Recall decreases by >10% from baseline
+- Error rate increases by >50%
+- Query latency increases by >100%
+
+**Manual Rollback** if:
+- User complaints increase significantly
+- Zero-result queries increase instead of decrease
+
+## Future Enhancements
+
+These improvements create a solid foundation. Future enhancements (in order of priority):
+
+1. **Cross-Encoder Reranking** (ADR-012)
+   - Two-stage retrieval: broad recall (50 candidates) → precise reranking (top 10)
+   - Expected: +15-20% additional recall improvement
+   - Builds on: Better embeddings retrieve better candidates to rerank
+
+2. **Hybrid Search** (ADR-013)
+   - Combine vector search + BM25 keyword search
+   - Expected: +10-15% additional recall (especially for exact matches)
+   - Builds on: Semantic chunks provide better keyword match context
+
+3. **Multi-App Indexing** (ADR-014)
+   - Index calendar, deck, files (currently notes-only)
+   - Expected: Expands searchable corpus 3-5x
+   - Builds on: Proven chunking and embedding strategy
+
+4. **GraphRAG** (ADR-015, conditional)
+   - Only if: Global thematic queries needed OR corpus >10K documents
+   - Expected: Relationship discovery, multi-hop reasoning
+   - Builds on: High-quality embeddings improve graph construction
+
+## References
+
+### Research Papers
+
+1. **RecursiveCharacterTextSplitter**
+   - LangChain Documentation: https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter
+   - Proven technique used by major RAG systems
+
+2. **MTEB Leaderboard** (Massive Text Embedding Benchmark)
+   - https://huggingface.co/spaces/mteb/leaderboard
+   - Comprehensive embedding model comparison
+
+3. **mxbai-embed-large**
+   - Model: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
+   - Best general-purpose embedding model (MTEB: 64.68)
+
+### Related ADRs
+
+- **ADR-003**: Vector Database and Semantic Search Architecture (original implementation)
+- **ADR-008**: MCP Sampling for Multi-App Semantic Search with RAG (answer generation)
+
+### Tools & Libraries
+
+- **LangChain Text Splitters**: https://python.langchain.com/docs/modules/data_connection/document_transformers/
+- **Ollama Embedding Models**: https://ollama.ai/library
+- **Qdrant Collections**: https://qdrant.tech/documentation/concepts/collections/
+
+## Summary
+
+This ADR addresses the root causes of poor semantic search recall:
+
+1. **Better Chunking**: Semantic sentence-aware splitting (preserves context)
+2. **Better Embeddings**: Upgrade to mxbai-embed-large-v1 (richer semantic space)
+
+**Expected Impact**: 40-60% recall improvement with minimal cost and complexity.
+
+**Why This Approach**:
+- Fixes fundamentals before adding complexity
+- Proven techniques (not experimental)
+- Simple implementation (1-2 weeks)
+- Creates foundation for future enhancements
+- No new infrastructure or ongoing costs
+
+**Next Steps**: Approve ADR → Implement changes → Reindex → Validate → Production rollout
+
+## Implementation Status
+
+### Completed (2025-11-18)
+
+**✅ Semantic Markdown-Aware Chunking (Option C1 + C3 Hybrid)**
+
+Implementation details:
+- Replaced custom word-based chunking with `MarkdownTextSplitter` from LangChain
+- Optimized for Nextcloud Notes markdown content with special handling for:
+  - Headers (`#`, `##`, `###`, etc.)
+  - Code blocks (` ``` `)
+  - Lists (`-`, `*`, `1.`)
+  - Horizontal rules (`---`)
+  - Paragraphs and sentences
+- Maintained `ChunkWithPosition` interface for backward compatibility
+- Updated configuration defaults:
+  - `DOCUMENT_CHUNK_SIZE`: 512 words → 2048 characters
+  - `DOCUMENT_CHUNK_OVERLAP`: 50 words → 200 characters
+- Updated unit tests to verify position tracking and boundary preservation
+- All tests passing with markdown-aware character-based chunking
+
+**Files Modified**:
+- `nextcloud_mcp_server/vector/document_chunker.py` - LangChain integration
+- `nextcloud_mcp_server/config.py` - Character-based defaults
+- `tests/unit/test_document_chunker.py` - Updated test suite
+
+**Dependencies Added**:
+- `langchain-text-splitters>=1.0.0` (already present in `pyproject.toml`)
+
+**Migration Required**:
+- ⚠️ Full reindex required to apply new chunking strategy
+- Existing documents in vector database use old word-based chunks
+- See "Migration Strategy" section above for reindexing process
+
+### Pending
+
+**⏳ Embedding Model Upgrade (Option E1)**
+
+Still to be implemented:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Implement dynamic dimension detection in `ollama_provider.py`
+- Create migration script for collection reindexing
+- Run benchmarking to validate improvement
+- Deploy to production with atomic collection swap
+
+**Estimated Timeline**: 1-2 weeks for implementation and validation
@@ -0,0 +1,619 @@
+# ADR-012: Unified Multi-Algorithm Search with Client-Configurable Weighting
+
+## Status
+Proposed
+
+## Context
+
+### Current State
+
+The Nextcloud MCP server currently provides semantic search via vector similarity (Qdrant), as designed in ADR-003 and implemented through ADR-007. However, users and MCP clients have limited control over search behavior:
+
+1. **Single algorithm only**: Only pure vector similarity search is available
+2. **No algorithm selection**: MCP clients cannot choose between semantic, keyword, or fuzzy approaches
+3. **No weighting control**: Clients cannot adjust the balance between different search methods
+4. **Disconnected implementations**: Viz pane uses different search algorithms than MCP tools
+5. **Limited flexibility**: No way to optimize search for different use cases (exact match vs. conceptual similarity)
+
+### User Needs
+
+Different search scenarios require different algorithms:
+
+- **Exact match queries**: "Find note titled 'Q1 Budget'" → keyword search preferred
+- **Conceptual queries**: "What are my goals for next quarter?" → semantic search preferred
+- **Typo-tolerant queries**: "Find note about kuberntes" → fuzzy search needed
+- **Balanced queries**: "Find documentation about API endpoints" → hybrid search optimal
+
+Additionally, users need a **testing interface** (viz pane) to:
+- Experiment with different search algorithms on their own documents
+- Visualize search results and algorithm behavior
+- Tune weights for optimal results
+- Understand which algorithm works best for their queries
+
+### Technical Requirements
+
+1. **Unified interface**: Single MCP tool supporting multiple algorithms
+2. **Client control**: MCP clients specify algorithm and weights via tool parameters
+3. **Backward compatibility**: Existing `nc_semantic_search()` behavior preserved
+4. **Shared implementation**: Viz pane and MCP tools use identical search algorithms
+5. **User accessibility**: Viz pane available to all logged-in users with vector sync enabled
+6. **Performance**: Minimal overhead for algorithm selection
+
+## Decision
+
+We will implement a **unified multi-algorithm search architecture** with the following components:
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                         MCP Client / User Browser                            │
+│                                                                               │
+│  ┌──────────────────────────┐         ┌──────────────────────────────────┐  │
+│  │   MCP Tool Call          │         │   Viz Pane (Browser UI)          │  │
+│  │                          │         │                                  │  │
+│  │ nc_semantic_search(      │         │ - Algorithm selector dropdown    │  │
+│  │   query="kubernetes",    │         │ - Weight adjustment sliders      │  │
+│  │   algorithm="hybrid",    │         │ - Interactive 2D scatter plot    │  │
+│  │   semantic_weight=0.5,   │         │ - Side-by-side comparison        │  │
+│  │   keyword_weight=0.3,    │         │ - Real-time search testing       │  │
+│  │   fuzzy_weight=0.2       │         │                                  │  │
+│  │ )                        │         │                                  │  │
+│  └───────────┬──────────────┘         └────────────┬─────────────────────┘  │
+└──────────────┼─────────────────────────────────────┼────────────────────────┘
+               │                                      │
+               │ MCP Protocol                         │ HTTPS (htmx)
+               │                                      │
+┌──────────────▼──────────────────────────────────────▼────────────────────────┐
+│                        MCP Server (/app endpoint)                             │
+│                                                                               │
+│  ┌─────────────────────────────────────────────────────────────────────────┐ │
+│  │              Unified Search Interface (server/semantic.py)              │ │
+│  │                                                                         │ │
+│  │  @mcp.tool() nc_semantic_search(algorithm, weights...)                 │ │
+│  │  ├─ Validate parameters (weights sum ≤1.0)                             │ │
+│  │  ├─ Dispatch to algorithm selector                                     │ │
+│  │  └─ Return ranked SearchResponse                                       │ │
+│  └────────────────────────────┬────────────────────────────────────────────┘ │
+│                                │                                              │
+│  ┌────────────────────────────▼────────────────────────────────────────────┐ │
+│  │              Algorithm Dispatcher (search/algorithms.py)                │ │
+│  │                                                                         │ │
+│  │  if algorithm == "semantic":    → semantic.py                          │ │
+│  │  if algorithm == "keyword":     → keyword.py                           │ │
+│  │  if algorithm == "fuzzy":       → fuzzy.py                             │ │
+│  │  if algorithm == "hybrid":      → hybrid.py (RRF fusion)               │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+│                                                                               │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────┐           │
+│  │  semantic.py     │  │  keyword.py      │  │  fuzzy.py        │           │
+│  │                  │  │                  │  │                  │           │
+│  │ • Query Qdrant   │  │ • Token matching │  │ • Char overlap   │           │
+│  │ • Cosine dist    │  │ • Title weight   │  │ • 70% threshold  │           │
+│  │ • Score ≥0.7     │  │ • ADR-001 logic  │  │ • Simple impl    │           │
+│  └────────┬─────────┘  └────────┬─────────┘  └────────┬─────────┘           │
+│           │                     │                      │                     │
+│           └─────────────────────┼──────────────────────┘                     │
+│                                 │                                            │
+│  ┌──────────────────────────────▼──────────────────────────────────────────┐ │
+│  │                    hybrid.py (Reciprocal Rank Fusion)                   │ │
+│  │                                                                         │ │
+│  │  1. Run algorithms in parallel (semantic, keyword, fuzzy)              │ │
+│  │  2. Collect ranked results from each                                   │ │
+│  │  3. Apply RRF formula: score = weight / (k + rank)                     │ │
+│  │  4. Combine scores across algorithms                                   │ │
+│  │  5. Re-rank by combined score                                          │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+└───────────────────────────────────┬───────────────────────────────────────────┘
+                                    │
+                    ┌───────────────┴───────────────┐
+                    │                               │
+         ┌──────────▼──────────┐         ┌─────────▼────────────┐
+         │ Qdrant Vector DB    │         │ Nextcloud APIs       │
+         │                     │         │                      │
+         │ • Vector search     │         │ • Access verification│
+         │ • user_id filter    │         │ • Full metadata fetch│
+         │ • Score threshold   │         │ • Permission checks  │
+         │ • 768-dim embeddings│         │                      │
+         └─────────────────────┘         └──────────────────────┘
+```
+
+### Data Flow
+
+#### MCP Tool Request
+```
+1. Client calls nc_semantic_search(query, algorithm="hybrid", weights...)
+2. Server validates parameters (weights sum ≤1.0)
+3. Dispatcher routes to hybrid.py
+4. Hybrid search runs semantic, keyword, fuzzy in parallel
+5. RRF combines results with weighted scores
+6. Access verification via Nextcloud API
+7. Return ranked SearchResponse to client
+```
+
+#### Viz Pane Request (Server-Side Processing)
+```
+1. User navigates to /app (Vector Visualization tab)
+2. Browser loads vector-viz fragment via htmx
+3. User enters query and adjusts algorithm/weights
+4. htmx sends request to /app/vector-viz endpoint
+5. Server executes search via search/algorithms.py:
+   - Filters by user_id (multi-tenant security)
+   - Applies selected algorithm (semantic/keyword/fuzzy/hybrid)
+   - Filters by document type (notes/files/calendar/contacts)
+   - Retrieves matching results + metadata
+6. Server performs PCA reduction (768-dim → 2D):
+   - Converts matching results to 2D coordinates
+   - Only sends coordinates + metadata (not full vectors)
+   - Dramatically reduces bandwidth (e.g., 768 floats → 2 floats per doc)
+7. Server returns JSON: {results: [...], coordinates_2d: [...], stats: {...}}
+8. Browser receives lightweight response
+9. Plotly.js renders interactive scatter plot
+10. Matching results highlighted (blue), non-matches grayed (40% opacity)
+```
+
+**Performance Benefits of Server-Side Processing**:
+- **Bandwidth reduction**: ~384x less data (2 floats vs 768 floats per document)
+- **Client efficiency**: Browser only handles visualization, not computation
+- **Scalability**: Can visualize 10,000+ documents without client-side lag
+- **Security**: Raw vectors never leave server
+- **Consistency**: Same search logic as MCP tool (no drift)
+
+### 1. Core Search Algorithms
+
+Four search algorithms will be available:
+
+#### a) Semantic Search (Vector Similarity)
+- **Method**: Cosine distance in 768-dimensional embedding space
+- **Implementation**: Qdrant `query_points` with user_id filtering
+- **Use case**: Conceptual queries, finding related content
+- **Current status**: Implemented in `nextcloud_mcp_server/server/semantic.py`
+
+#### b) Keyword Search (Token-Based)
+- **Method**: Token matching with weighted scoring (from ADR-001)
+- **Implementation**: Title matches weighted 3x higher than content
+- **Use case**: Exact phrase matching, known titles
+- **Current status**: Designed in ADR-001, not implemented
+
+#### c) Fuzzy Search (Character Overlap)
+- **Method**: Simple character-based similarity (70% threshold)
+- **Implementation**: Character set comparison (current viz pane approach)
+- **Use case**: Typo tolerance, approximate matching
+- **Current status**: Implemented in viz pane only
+
+#### d) Hybrid Search (Multi-Algorithm Fusion)
+- **Method**: Reciprocal Rank Fusion (RRF) from ADR-003
+- **Implementation**: Parallel execution + score combination
+- **Use case**: Balanced queries, general-purpose search
+- **Current status**: Designed in ADR-003, not implemented
+
+### 2. Unified MCP Tool Interface
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(
+    query: str,
+    ctx: Context,
+    limit: int = 10,
+    score_threshold: float = 0.7,
+    algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+    semantic_weight: float = 0.5,
+    keyword_weight: float = 0.3,
+    fuzzy_weight: float = 0.2,
+) -> SearchResponse:
+    """
+    Search Nextcloud content using configurable algorithms.
+
+    Args:
+        query: Natural language search query
+        ctx: MCP context for authentication
+        limit: Maximum results to return
+        score_threshold: Minimum similarity score (semantic/hybrid only)
+        algorithm: Search algorithm to use
+        semantic_weight: Weight for semantic results (hybrid only, default: 0.5)
+        keyword_weight: Weight for keyword results (hybrid only, default: 0.3)
+        fuzzy_weight: Weight for fuzzy results (hybrid only, default: 0.2)
+
+    Returns:
+        Ranked search results with scores and excerpts
+    """
+```
+
+**Key decisions**:
+- **Single tool name**: Keep `nc_semantic_search` for backward compatibility
+- **Algorithm parameter**: Explicit selection via enum
+- **Weight parameters**: Client-configurable, only apply to hybrid mode
+- **Validation**: Weights must sum to ≤1.0, enforced server-side
+- **Defaults**: Hybrid mode with balanced weights (semantic 50%, keyword 30%, fuzzy 20%)
+
+### 3. Shared Algorithm Implementation
+
+Extract search algorithms into reusable module:
+
+```
+nextcloud_mcp_server/
+├── search/
+│   ├── __init__.py
+│   ├── algorithms.py          # Core search implementations
+│   ├── semantic.py             # Vector similarity search
+│   ├── keyword.py              # Token-based search (ADR-001)
+│   ├── fuzzy.py                # Character overlap search
+│   └── hybrid.py               # RRF fusion (ADR-003)
+└── server/
+    └── semantic.py             # MCP tool wrapper
+```
+
+**Benefits**:
+- Viz pane and MCP tools share identical implementations
+- Testable in isolation
+- Easy to add new algorithms (e.g., BM25, neural reranking)
+- Clear separation of concerns
+
+### 4. Viz Pane Integration
+
+Update viz pane (`nextcloud_mcp_server/auth/userinfo_routes.py`) to:
+
+1. **Use shared algorithms**: Import from `search/algorithms.py`
+2. **Server-side filtering**: All search and filtering operations happen server-side
+   - Query execution via shared search backend
+   - Document type filtering (notes, files, calendar, contacts)
+   - User ID filtering for multi-tenant security
+   - Only matching results + metadata sent to client
+   - Reduces bandwidth and improves performance
+3. **PCA reduction**: Server performs dimensionality reduction (768-dim → 2D)
+   - Only 2D coordinates sent to browser for visualization
+   - Dramatically reduces data transfer vs sending full vectors
+   - Enables visualization of large document collections
+4. **User accessibility**: Available to all users with vector sync enabled
+5. **Security**: Filter results by `user_id` (only show user's own documents)
+6. **Interactive testing**: Allow users to:
+   - Select algorithm type
+   - Adjust weights (hybrid mode)
+   - Compare results across algorithms
+   - Visualize result distribution in 2D space
+
+#### Viz Pane UI Components
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│ Vector Visualization                                          [Status] │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Configuration                                             │  │
+│ │                                                                  │  │
+│ │ Query: [_______________________________________________] [Search]│  │
+│ │                                                                  │  │
+│ │ Algorithm: [Hybrid ▼]  [Semantic] [Keyword] [Fuzzy]             │  │
+│ │                                                                  │  │
+│ │ Weights (Hybrid Mode):                                           │  │
+│ │   Semantic: [========50========] 0.5                             │  │
+│ │   Keyword:  [======30======    ] 0.3                             │  │
+│ │   Fuzzy:    [====20====        ] 0.2                             │  │
+│ │                                                                  │  │
+│ │ Document Types: ☑ Notes  ☑ Files  ☑ Calendar  ☑ Contacts        │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Vector Space Visualization (PCA 2D Projection)                   │  │
+│ │                                                                  │  │
+│ │        ▲                                                         │  │
+│ │    PC2 │     ●  ● ●      🔵 Matching results (full opacity)     │  │
+│ │        │  ●     ●  ●     ⚪ Non-matching results (40% opacity)   │  │
+│ │        │    🔵  ● ●                                              │  │
+│ │        │  ●  🔵  ●       Hover: Show document title + excerpt    │  │
+│ │        │  ● ●  🔵 ●      Click: Open document in Nextcloud       │  │
+│ │    ────┼──●─🔵──●─●────► PC1                                     │  │
+│ │        │   ● ●  ●                                                │  │
+│ │        │    🔵 ●   ●     Explained Variance:                     │  │
+│ │        │  ●    ●  ●      PC1: 23.4% | PC2: 18.7%                 │  │
+│ │        │     ● ●                                                 │  │
+│ │                                                                  │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Results (12 matching documents)                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Kubernetes Setup Guide                        Score: 0.87     │  │
+│ │    "...configure kubectl to connect to cluster..."              │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Container Orchestration Notes                 Score: 0.82     │  │
+│ │    "...deployment strategies for kubernetes..."                 │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 K8s Troubleshooting                           Score: 0.79     │  │
+│ │    "...common kuberntes errors and solutions..."                │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ [Show More Results...]                                           │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Algorithm Performance Comparison                                 │  │
+│ │                                                                  │  │
+│ │ Algorithm    │ Results │ Avg Score │ Time (ms) │ Precision     │  │
+│ │ ─────────────┼─────────┼───────────┼───────────┼───────────     │  │
+│ │ Semantic     │   45    │   0.78    │   145ms   │  ████░ 0.82   │  │
+│ │ Keyword      │   23    │   0.91    │    42ms   │  ███░░ 0.67   │  │
+│ │ Fuzzy        │   67    │   0.72    │    89ms   │  ██░░░ 0.45   │  │
+│ │ Hybrid (RRF) │   52    │   0.84    │   198ms   │  █████ 0.89   │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+**Key UI Features**:
+
+1. **Search Input**: Real-time query testing with instant visualization
+2. **Algorithm Selector**: Dropdown + quick-select buttons
+3. **Weight Sliders**: Visual adjustment with live preview (hybrid mode only)
+4. **Document Type Filters**: Checkboxes for notes, files, calendar, contacts
+5. **2D Scatter Plot**: Interactive Plotly.js visualization
+   - Blue dots = matching documents (full opacity)
+   - Gray dots = non-matching documents (40% opacity)
+   - Hover = show title + excerpt tooltip
+   - Click = open document in Nextcloud
+   - Zoom/pan controls for exploration
+6. **Results Panel**: Ranked list with scores and excerpts
+7. **Performance Table**: Compare algorithm speed and accuracy
+8. **Explained Variance**: Show how much information PCA preserves
+
+**Technology Stack**:
+- **Frontend**: htmx for dynamic loading, Alpine.js for reactivity
+- **Visualization**: Plotly.js for interactive scatter plots
+- **Styling**: Tailwind CSS (consistent with existing /app UI)
+- **Backend**: Shared `search/algorithms.py` implementation
+
+### 5. Reciprocal Rank Fusion (RRF) for Hybrid Search
+
+Following ADR-003's design:
+
+```python
+def reciprocal_rank_fusion(
+    results: dict[str, list[SearchResult]],
+    weights: dict[str, float],
+    k: int = 60
+) -> list[SearchResult]:
+    """
+    Combine multiple ranked result lists using RRF.
+
+    Args:
+        results: Dict of algorithm_name -> ranked results
+        weights: Dict of algorithm_name -> weight (0-1)
+        k: RRF constant (default: 60, standard value)
+
+    Returns:
+        Combined and re-ranked results
+    """
+    scores = defaultdict(float)
+
+    for algo_name, algo_results in results.items():
+        weight = weights.get(algo_name, 0.0)
+        for rank, result in enumerate(algo_results, start=1):
+            # RRF formula: 1 / (k + rank)
+            rrf_score = weight / (k + rank)
+            scores[result.doc_id] += rrf_score
+
+    # Sort by combined score, return top results
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
+```
+
+**RRF properties**:
+- **Rank-based**: Uses position, not raw scores (handles score scale differences)
+- **Proven effective**: Standard approach in information retrieval
+- **Configurable**: `k` parameter controls rank decay (default: 60)
+- **Weight support**: Allows algorithm-specific importance
+
+## Implementation Plan
+
+### Phase 1: Extract and Unify Algorithms (Week 1)
+
+1. Create `nextcloud_mcp_server/search/` module
+2. Implement `algorithms.py` with base interface
+3. Extract semantic search logic from `server/semantic.py`
+4. Implement keyword search from ADR-001 design
+5. Extract fuzzy search from viz pane
+6. Implement RRF hybrid search from ADR-003
+7. Add comprehensive unit tests for each algorithm
+
+### Phase 2: Update MCP Tool (Week 1-2)
+
+1. Add `algorithm` parameter to `nc_semantic_search()`
+2. Add weight parameters (`semantic_weight`, etc.)
+3. Implement algorithm dispatcher
+4. Add parameter validation (weights sum ≤1.0)
+5. Update response model to include algorithm metadata
+6. Maintain backward compatibility (default: hybrid)
+7. Add integration tests for all algorithm modes
+
+### Phase 3: Update Viz Pane (Week 2)
+
+**Critical: All processing must happen server-side**
+
+1. **Remove client-side search filtering**
+   - Delete JavaScript-based keyword/fuzzy matching
+   - Remove client-side document type filtering
+   - No search logic in browser
+2. **Implement server-side endpoint** (`/app/vector-viz`)
+   - Accept query, algorithm, weights, doc_type filters
+   - Execute search via `search/algorithms.py`
+   - Filter results by user_id (security)
+   - Perform PCA reduction (768-dim → 2D)
+   - Return JSON with 2D coordinates + metadata only
+3. **Update frontend**
+   - htmx form submission to `/app/vector-viz`
+   - Algorithm selector dropdown
+   - Weight adjustment sliders (htmx updates on change)
+   - Document type checkboxes
+   - Plotly.js visualization of server response
+4. **Performance optimization**
+   - Limit results to user's documents only
+   - Cache PCA transformation (invalidate on new vectors)
+   - Stream large result sets if needed
+   - Add loading indicators for server processing
+
+### Phase 4: Documentation and Testing (Week 2-3)
+
+1. Update MCP tool documentation
+2. Add algorithm selection guide
+3. Document weight tuning recommendations
+4. Add end-to-end tests (MCP + viz pane)
+5. Performance benchmarks for each algorithm
+6. Update CLAUDE.md with search patterns
+
+## Consequences
+
+### Positive
+
+1. **Flexibility**: MCP clients can optimize search for their use case
+2. **Unified implementation**: Single source of truth for search algorithms
+3. **User empowerment**: Viz pane enables query testing and tuning
+4. **Backward compatible**: Existing semantic search behavior preserved
+5. **Extensible**: Easy to add new algorithms (BM25, neural reranking)
+6. **Testable**: Each algorithm can be unit tested independently
+7. **Standards-based**: RRF is proven in production systems
+
+### Negative
+
+1. **Complexity**: More parameters for clients to understand
+2. **API surface**: Larger tool signature (8 parameters)
+3. **Performance**: Hybrid search requires multiple queries
+4. **Validation overhead**: Weight validation adds processing
+5. **Documentation burden**: Need to explain when to use each algorithm
+
+### Neutral
+
+1. **Weight defaults**: May need tuning based on user feedback
+2. **Algorithm performance**: Will vary by content type and query
+3. **Viz pane adoption**: Unknown if users will utilize testing interface
+
+## Alternatives Considered
+
+### Alternative 1: Separate Tools Per Algorithm
+
+```python
+@mcp.tool()
+async def nc_semantic_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure vector similarity search."""
+
+@mcp.tool()
+async def nc_keyword_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure keyword matching."""
+
+@mcp.tool()
+async def nc_hybrid_search(query: str, ctx: Context, weights: dict, ...) -> SearchResponse:
+    """Hybrid search with weights."""
+```
+
+**Rejected because**:
+- API proliferation (3+ tools instead of 1)
+- Harder to discover capabilities
+- Backward compatibility issues
+- DRY violation (repeated parameters)
+
+### Alternative 2: Server-Wide Configuration Only
+
+```python
+# .env configuration
+SEARCH_ALGORITHM=hybrid
+SEMANTIC_WEIGHT=0.5
+KEYWORD_WEIGHT=0.3
+FUZZY_WEIGHT=0.2
+```
+
+**Rejected because**:
+- No per-query flexibility
+- MCP clients cannot optimize for different tasks
+- Requires server restart for changes
+- User's requirement: "expose a way for users to override the default weights"
+
+### Alternative 3: Production-Grade Fuzzy (Levenshtein/RapidFuzz)
+
+**Rejected because**:
+- Adds external dependency
+- Simple character overlap performs adequately
+- Can always upgrade later if needed
+- User's preference: "Keep simple character overlap"
+
+## Related ADRs
+
+- **ADR-001**: Enhanced Note Search (keyword algorithm design)
+- **ADR-003**: Vector Database and Semantic Search (hybrid search + RRF design)
+- **ADR-007**: Background Vector Sync (semantic search implementation)
+- **ADR-008**: MCP Sampling for RAG (uses semantic search results)
+- **ADR-009**: Semantic Search OAuth Scope (security model)
+- **ADR-011**: Improving Semantic Search Quality (mentions future "ADR-013" for hybrid search)
+
+**This ADR supersedes**:
+- ADR-011's placeholder for "ADR-013: Hybrid Search"
+
+**This ADR implements**:
+- ADR-003's hybrid search design (previously unimplemented)
+- ADR-001's keyword search design (previously unimplemented)
+
+## References
+
+- **Reciprocal Rank Fusion**: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). "Reciprocal rank fusion outperforms condorcet and individual rank learning methods." SIGIR '09.
+- **Vector Search**: Malkov, Y. A., & Yashunin, D. A. (2018). "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs." TPAMI.
+- **Hybrid Search Best Practices**: Qdrant documentation on hybrid search patterns
+- **MCP Protocol**: Model Context Protocol specification for tool design
+
+## Implementation Notes
+
+### Weight Validation
+
+```python
+def validate_weights(
+    semantic_weight: float,
+    keyword_weight: float,
+    fuzzy_weight: float
+) -> None:
+    """Validate hybrid search weights."""
+    if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+        raise ValueError("Weights must be non-negative")
+
+    total = semantic_weight + keyword_weight + fuzzy_weight
+    if total > 1.0:
+        raise ValueError(f"Weights sum to {total:.2f}, must be ≤1.0")
+
+    if total == 0.0:
+        raise ValueError("At least one weight must be > 0")
+```
+
+### Backward Compatibility
+
+The default behavior (`algorithm="hybrid"` with balanced weights) provides better results than current pure semantic search, while maintaining the same tool name and signature structure. Existing clients will automatically benefit from hybrid search without code changes.
+
+### Performance Considerations
+
+- **Semantic search**: ~50-200ms (vector DB query)
+- **Keyword search**: ~10-50ms (in-memory token matching)
+- **Fuzzy search**: ~20-100ms (character comparison)
+- **Hybrid search**: ~100-300ms (parallel execution + fusion)
+
+Parallel execution of algorithms minimizes hybrid search latency.
+
+### Security Model
+
+All algorithms respect the same security boundaries:
+1. **User filtering**: Qdrant queries filter by `user_id`
+2. **Access verification**: Results verified via Nextcloud API
+3. **OAuth scope**: `semantic:read` required for all algorithms
+4. **Viz pane**: Shows only current user's documents
+
+## Success Metrics
+
+1. **Adoption**: % of MCP clients using algorithm parameter
+2. **Performance**: Search latency percentiles (p50, p95, p99)
+3. **Quality**: User satisfaction with result relevance
+4. **Viz pane usage**: % of users accessing testing interface
+5. **Weight distribution**: Most common weight configurations
+
+## Future Enhancements
+
+1. **Additional algorithms**: BM25, TF-IDF, neural reranking
+2. **Auto-tuning**: Learn optimal weights per user
+3. **Query analysis**: Automatic algorithm selection based on query
+4. **Cross-app search**: Extend beyond notes to calendar, files, etc.
+5. **Feedback loop**: Use click-through rate to improve weights
@@ -0,0 +1,254 @@
+## ADR-013: RAG Evaluation Testing Framework
+
+**Status:** Proposed
+
+**Date:** 2025-11-15
+
+### Context
+
+The `nc_semantic_search_answer` tool implements a Retrieval-Augmented Generation (RAG) system where:
+1. **Retrieval**: Vector sync pipeline indexes Nextcloud documents (notes, calendar, contacts, etc.) into a vector database
+2. **Generation**: MCP client's LLM synthesizes answers from retrieved documents via MCP sampling (ADR-008)
+
+We need a testing framework to evaluate RAG system performance and identify whether failures occur in retrieval (wrong documents found) or generation (poor answer quality). This framework must use industry-standard evaluation methodologies while remaining practical to implement and maintain.
+
+To establish a baseline, we will use the **BeIR/nfcorpus** dataset (medical/biomedical corpus) with ~5,000 documents and established query/answer pairs.
+
+Homepage: https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/
+Download: https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/nfcorpus.zip
+
+### Decision
+
+We will implement a **two-part evaluation framework** that independently tests retrieval and generation quality using pytest fixtures.
+
+#### In Scope
+
+**1. Retrieval Evaluation**
+Tests the vector sync/embedding pipeline's ability to find relevant documents.
+
+- **Metric: Context Recall** (Did we retrieve documents containing the answer?)
+  - **Evaluation method**: Heuristic - Check if ground-truth document IDs appear in top-k retrieval results
+  - **Test**: Query → Semantic search → Assert expected doc IDs present
+
+**2. Generation Evaluation**
+Tests the MCP client LLM's ability to synthesize correct answers from retrieved context.
+
+- **Metric: Answer Correctness** (Is the generated answer factually correct?)
+  - **Evaluation method**: LLM-as-judge - Compare RAG answer against ground-truth answer
+  - **Test**: Query → `nc_semantic_search_answer` → LLM evaluates answer vs. ground truth (binary true/false)
+
+#### Out of Scope (Initial Implementation)
+
+- **Context Relevance/Precision**: Measuring irrelevant documents in retrieval results
+- **Faithfulness/Groundedness**: Detecting hallucinations not supported by retrieved context
+- **Answer Relevance**: Whether answer addresses the specific question asked
+- **Out-of-Scope Handling**: Testing "I don't know" responses when answer isn't in context
+- **Continuous benchmarking**: Automated tracking of metric trends over time
+- **Custom domain datasets**: Production-specific test data (medical corpus used initially)
+
+These remain valuable for future iterations but add complexity beyond our initial goals.
+
+#### Implementation
+
+**Test Structure**
+
+Location: `tests/rag_evaluation/`
+- `test_retrieval_quality.py` - Retrieval evaluation tests
+- `test_generation_quality.py` - Generation evaluation tests
+- `conftest.py` - Fixtures for test data, MCP clients, and evaluation LLMs
+
+**Required Pytest Fixtures**
+
+1. **`nfcorpus_test_data`** (session-scoped)
+   - Downloads/caches BeIR nfcorpus dataset at runtime
+   - Loads 5 pre-selected test queries with:
+     - Query text
+     - Pre-generated ground-truth answer (from `tests/rag_evaluation/fixtures/ground_truth.json`)
+     - Expected document IDs (from qrels with score=2)
+   - Uploads all corpus documents as notes in test Nextcloud instance
+   - Triggers vector sync to index documents
+   - Waits for indexing completion
+   - Returns test case data structure
+
+2. **`mcp_sampling_client`** (session-scoped)
+   - Creates MCP client that supports sampling
+   - Configurable LLM provider (ollama or anthropic) via environment:
+     - `RAG_EVAL_PROVIDER=ollama` (default) or `anthropic`
+     - `RAG_EVAL_OLLAMA_BASE_URL=http://localhost:11434`
+     - `RAG_EVAL_OLLAMA_MODEL=llama3.1:8b`
+     - `RAG_EVAL_ANTHROPIC_API_KEY=sk-...`
+     - `RAG_EVAL_ANTHROPIC_MODEL=claude-3-5-sonnet-20241022`
+   - Returns configured MCP client fixture
+
+3. **`evaluation_llm`** (session-scoped)
+   - Separate LLM instance for evaluation (independent from MCP client)
+   - Same provider configuration as `mcp_sampling_client`
+   - Returns callable: `async def evaluate(prompt: str) -> str`
+
+**Test Implementation Examples**
+
+```python
+# tests/rag_evaluation/test_retrieval_quality.py
+async def test_retrieval_recall(nc_client, nfcorpus_test_data):
+    """Test that semantic search retrieves documents containing the answer."""
+    for test_case in nfcorpus_test_data:
+        # Perform semantic search (retrieval only, no generation)
+        results = await nc_client.notes.semantic_search(
+            query=test_case.query,
+            limit=10
+        )
+
+        retrieved_doc_ids = {r.document_id for r in results}
+        expected_doc_ids = set(test_case.expected_document_ids)
+
+        # Context Recall: Are expected documents in top-k results?
+        recall = len(expected_doc_ids & retrieved_doc_ids) / len(expected_doc_ids)
+        assert recall >= 0.8, f"Recall {recall} below threshold for query: {test_case.query}"
+
+
+# tests/rag_evaluation/test_generation_quality.py
+async def test_answer_correctness(mcp_sampling_client, evaluation_llm, nfcorpus_test_data):
+    """Test that RAG system generates factually correct answers."""
+    for test_case in nfcorpus_test_data:
+        # Execute full RAG pipeline (retrieval + generation)
+        result = await mcp_sampling_client.call_tool(
+            "nc_semantic_search_answer",
+            arguments={"query": test_case.query, "limit": 5}
+        )
+
+        rag_answer = result["generated_answer"]
+
+        # LLM-as-judge evaluation
+        evaluation_prompt = f"""Compare these two answers and respond with only TRUE or FALSE.
+
+Question: {test_case.query}
+
+Generated Answer: {rag_answer}
+
+Ground Truth Answer: {test_case.ground_truth}
+
+Are these answers semantically equivalent (do they convey the same factual information)?
+Respond with only: TRUE or FALSE"""
+
+        evaluation_result = await evaluation_llm(evaluation_prompt)
+
+        assert evaluation_result.strip().upper() == "TRUE", \
+            f"Answer mismatch for query: {test_case.query}\nGot: {rag_answer}\nExpected: {test_case.ground_truth}"
+```
+
+**Dataset Integration**
+
+The BeIR nfcorpus dataset structure:
+- **corpus.jsonl**: 3,633 medical/biomedical documents (articles from PubMed)
+- **queries.jsonl**: 3,237 queries (questions)
+- **qrels/*.tsv**: Relevance judgments mapping query IDs to document IDs with scores (2=highly relevant, 1=somewhat relevant)
+
+**Important**: The dataset provides relevance judgments (which documents answer which queries) but does NOT include ground truth answers. We must generate synthetic ground truth offline.
+
+**Selected Test Queries** (5 diverse candidates):
+
+1. **PLAIN-2630**: "Alkylphenol Endocrine Disruptors and Allergies" (5 words, 21 highly relevant docs)
+2. **PLAIN-2660**: "How Long to Detox From Fish Before Pregnancy?" (8 words, 20 highly relevant docs)
+3. **PLAIN-2510**: "Coffee and Artery Function" (4 words, 16 highly relevant docs)
+4. **PLAIN-2430**: "Preventing Brain Loss with B Vitamins?" (6 words, 15 highly relevant docs)
+5. **PLAIN-2690**: "Chronic Headaches and Pork Tapeworms" (5 words, 14 highly relevant docs)
+
+**Ground Truth Generation** (offline, pre-test):
+
+Ground truth answers will be generated offline using a script that:
+1. Loads nfcorpus dataset
+2. For each selected query, extracts top 3-5 highly relevant documents
+3. Uses an LLM (ollama/anthropic) to synthesize a reference answer
+4. Stores ground truth in `tests/rag_evaluation/fixtures/ground_truth.json`
+
+```python
+# tools/generate_rag_ground_truth.py
+async def generate_ground_truth(query: str, relevant_docs: List[dict], llm: LLMProvider) -> str:
+    """Generate synthetic ground truth answer from highly relevant documents."""
+    context = "\n\n".join([
+        f"Document {i+1}:\nTitle: {doc['title']}\n{doc['text']}"
+        for i, doc in enumerate(relevant_docs[:5])
+    ])
+
+    prompt = f"""Based on the following documents, provide a comprehensive answer to this question:
+
+Question: {query}
+
+{context}
+
+Provide a factual, well-structured answer that synthesizes information from the documents.
+Focus on accuracy and completeness."""
+
+    return await llm.generate(prompt, max_tokens=500)
+```
+
+**Dataset Loading at Test Runtime** (in `nfcorpus_test_data` fixture):
+
+1. Download nfcorpus dataset (cached in pytest temp directory)
+2. Load corpus, queries, and qrels (relevance judgments)
+3. Load pre-generated ground truth from `tests/rag_evaluation/fixtures/ground_truth.json`
+4. Upload all corpus documents as Nextcloud notes
+5. Trigger vector sync to index documents
+6. Wait for indexing completion
+7. Return test cases with query, ground truth, and expected doc IDs
+
+**LLM Provider Abstraction**
+
+```python
+# tests/rag_evaluation/llm_providers.py
+class LLMProvider(Protocol):
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str: ...
+
+class OllamaProvider:
+    def __init__(self, base_url: str, model: str):
+        self.base_url = base_url
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        # Use httpx to call Ollama API
+        ...
+
+class AnthropicProvider:
+    def __init__(self, api_key: str, model: str):
+        self.client = anthropic.AsyncAnthropic(api_key=api_key)
+        self.model = model
+
+    async def generate(self, prompt: str, max_tokens: int = 100) -> str:
+        message = await self.client.messages.create(
+            model=self.model,
+            max_tokens=max_tokens,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        return message.content[0].text
+```
+
+### Consequences
+
+**Positive:**
+
+* **Actionable debugging**: Separate retrieval/generation tests pinpoint failure location
+* **Industry-standard metrics**: Context Recall and Answer Correctness are recognized RAG evaluation metrics
+* **Simple initial implementation**: Binary LLM evaluation (true/false) is straightforward to implement and interpret
+* **Extensible framework**: Easy to add more metrics (faithfulness, relevance) later
+* **Standardized benchmark**: nfcorpus provides objective comparison against published RAG systems
+* **Hybrid evaluation**: Combines efficiency (heuristics for retrieval) with quality (LLM-as-judge for generation)
+* **Provider flexibility**: Supports both local (Ollama) and cloud (Anthropic) LLM evaluation
+
+**Negative:**
+
+* **Medical domain bias**: nfcorpus is medical/biomedical content, may not represent production use cases (personal notes, calendar events, etc.)
+* **Manual test execution**: Tests require external LLM access and are not integrated into CI pipeline
+* **Limited initial coverage**: Starting with only 5 queries provides limited statistical confidence
+* **Evaluation cost**: LLM-as-judge for generation evaluation incurs API costs (Anthropic) or requires local inference (Ollama)
+* **Single metric per component**: Initial scope tests only one metric per component, missing other important quality dimensions
+* **Synthetic ground truth**: Ground truth answers are LLM-generated, not human-validated, which may introduce evaluation bias
+* **Large corpus upload**: Uploading 3,633 documents at test runtime may be slow; caching strategy needed
+
+**Future Work:**
+
+* Expand to 50-100 queries for statistical significance
+* Add custom test dataset with production-representative documents (meeting notes, task lists, etc.)
+* Implement additional metrics (faithfulness, context relevance, answer relevance)
+* Create automated benchmarking dashboard to track metric trends
+* Test multi-hop reasoning (synthesis questions requiring multiple documents)
+* Evaluate out-of-scope handling ("I don't know" responses)
@@ -0,0 +1,241 @@
+# ADR-014: Replace Custom Keyword Search with BM25 Hybrid Search via Qdrant
+
+**Date:** 2025-11-16
+
+**Status:** Implemented
+
+---
+
+### 1. Context
+
+Our RAG application currently employs two separate retrieval mechanisms:
+1.  **Dense (Semantic) Search:** Using vector embeddings stored in our Qdrant database to find semantically similar context.
+2.  **Keyword Search:** A custom-built fuzzy/character-based search to match-specific keywords, acronyms, and product codes that semantic search often misses.
+
+This dual-system approach has several drawbacks:
+* **Poor Relevance:** Our current keyword search is basic (e.g., `LIKE` queries or simple fuzzy matching). It is not as effective as modern full-text search algorithms like BM25.
+* **Clunky Fusion:** We lack a robust, principled method to combine the results from the two systems. This leads to disjointed logic in the application layer and suboptimal context being passed to the LLM.
+* **Architectural Complexity:** We must maintain two separate search pathways (one to Qdrant, one to the keyword search mechanism), increasing code complexity and maintenance overhead.
+
+Our vector database, **Qdrant**, natively supports **hybrid search** by combining dense vectors with BM25-based **sparse vectors** in a single collection.
+
+### 2. Decision
+
+We will **deprecate and remove** the existing custom keyword/fuzzy search functionality.
+
+We will **replace it by implementing native hybrid search within Qdrant**. This involves:
+1.  **Modifying the Qdrant Collection:** Updating our collection to support a named sparse vector index configured for BM25.
+2.  **Updating the Ingestion Pipeline:** For every document chunk, we will generate and upsert *both*:
+    * Its **dense vector** (from our existing embedding model).
+    * Its **sparse vector** (generated using a BM25-compatible model, e.g., `Qdrant/bm25` from `fastembed`).
+3.  **Refactoring Retrieval Logic:** All retrieval calls will be consolidated into a single Qdrant query using the `query_points` endpoint. This query will use the `prefetch` parameter to execute both dense and sparse searches, and Qdrant's built-in **Reciprocal Rank Fusion (RRF)** to automatically merge the results into a single, relevance-ranked list.
+4.  **Backfilling:** A one-time migration script will be created to generate and add sparse vectors for all existing documents in the Qdrant collection.
+
+---
+
+### 3. Considered Options
+
+#### Option 1: Native Qdrant Hybrid Search (Chosen)
+* Use Qdrant's built-in sparse vector and RRF capabilities.
+* **Pros:**
+    * **Consolidated Architecture:** Manages both dense and sparse indexes in one database.
+    * **No Data Sync Issues:** Updates are atomic. A single `upsert` updates both representations.
+    * **Built-in Fusion:** RRF is handled natively and efficiently by the database.
+    * **Superior Relevance:** Replaces our brittle custom search with the industry-standard BM25.
+* **Cons:**
+    * Requires a one-time data backfill which may be time-consuming.
+    * Adds a new step (sparse vector generation) to the ingestion pipeline.
+
+#### Option 2: External Full-Text Search (e.g., Elasticsearch)
+* Keep Qdrant for dense search and add a separate Elasticsearch/OpenSearch cluster for BM25.
+* **Pros:**
+    * Provides a very powerful, dedicated full-text search engine.
+* **Cons:**
+    * **High Complexity:** Introduces a new, stateful service to deploy, manage, and scale.
+    * **Data Sync Nightmare:** We would be responsible for ensuring that the document IDs and content in Qdrant and Elasticsearch are always perfectly synchronized. This is a major source of bugs.
+    * **Manual Fusion:** The application would have to query both systems and perform RRF manually.
+
+#### Option 3: Keep Current System
+* Make no changes.
+* **Pros:**
+    * No engineering effort required.
+* **Cons:**
+    * Fails to address the known relevance and architectural problems.
+    * Our RAG application's performance will remain suboptimal, especially for keyword-sensitive queries.
+
+---
+
+### 4. Rationale
+
+**Option 1 is the clear winner.** It directly solves our primary problem (poor keyword matching) by adopting the industry-standard BM25.
+
+Critically, it achieves this while **simplifying** our overall architecture, not complicating it. By leveraging features already present in our existing database (Qdrant), we avoid the massive operational and synchronization overhead of adding a second search system (Option 2).
+
+This decision consolidates our retrieval logic, eliminates the data consistency problem, and moves the complex fusion logic (RRF) from the application layer into the database, where it can be performed more efficiently.
+
+### 5. Consequences
+
+**New Work:**
+* **Ingestion:** The data ingestion pipeline must be updated to add the `fastembed` library (or similar), generate sparse vectors, and upsert them to the new named vector field in Qdrant.
+* **Retrieval:** The application's retrieval service must be refactored to use the `query_points` endpoint with `prefetch` and `fusion=models.Fusion.RRF`.
+* **Migration:** A one-time backfill script must be written and executed to add sparse vectors for all existing documents.
+* **Infrastructure:** The Qdrant collection schema must be updated (or re-created) to add the `sparse_vectors_config`.
+
+**Positive:**
+* **Improved Accuracy:** Retrieval will be significantly more accurate, handling both semantic and keyword queries robustly.
+* **Simplified Code:** The application's retrieval logic will be cleaner and simpler, with one endpoint instead of two.
+* **Reduced Maintenance:** We will remove the custom fuzzy-search code, which is brittle and difficult to maintain.
+
+**Negative:**
+* The data backfill process will require careful management to avoid downtime.
+* Ingestion time will slightly increase due to the extra step of sparse vector generation. This is considered a negligible trade-off for the gains in relevance.
+
+---
+
+### 6. Implementation Notes
+
+**Implementation completed on 2025-11-16**
+
+**Key Changes:**
+
+1. **Dependencies** (pyproject.toml:25):
+   - Added `fastembed>=0.4.2` for BM25 sparse vector embeddings
+   - Adjusted `pillow` version constraint to be compatible with fastembed
+
+2. **Qdrant Collection Schema** (nextcloud_mcp_server/vector/qdrant_client.py:113-128):
+   - Updated to named vectors: `{"dense": VectorParams(...), "sparse": SparseVectorParams(...)}`
+   - Added sparse vector configuration with BM25 index
+   - Maintains backward compatibility with existing collections (detects legacy schema)
+
+3. **BM25 Embedding Provider** (nextcloud_mcp_server/embedding/bm25_provider.py):
+   - Created `BM25SparseEmbeddingProvider` using FastEmbed's `Qdrant/bm25` model
+   - Implements `encode()` and `encode_batch()` methods
+   - Returns sparse vectors as `{indices: list[int], values: list[float]}` format
+
+4. **Document Indexing Pipeline** (nextcloud_mcp_server/vector/processor.py:229-255):
+   - Generates both dense (semantic) and sparse (BM25) embeddings for each document chunk
+   - Updates `PointStruct` to use named vectors: `vector={"dense": ..., "sparse": ...}`
+   - Maintains same chunking strategy (512 words, 50-word overlap)
+
+5. **BM25 Hybrid Search Algorithm** (nextcloud_mcp_server/search/bm25_hybrid.py):
+   - Implements `BM25HybridSearchAlgorithm` using Qdrant's native RRF fusion
+   - Uses `prefetch` parameter for parallel dense + sparse search
+   - Applies `fusion=models.Fusion.RRF` for automatic result merging
+   - Maintains same deduplication and filtering logic as semantic search
+
+6. **MCP Tool Updates** (nextcloud_mcp_server/server/semantic.py:39-68):
+   - Simplified `nc_semantic_search()` to use BM25 hybrid only
+   - Removed `algorithm`, `semantic_weight`, `keyword_weight`, `fuzzy_weight` parameters
+   - Updated default `score_threshold=0.0` for RRF scoring
+   - Returns `search_method="bm25_hybrid"` in responses
+
+7. **Legacy Algorithm Removal**:
+   - Deleted `nextcloud_mcp_server/search/keyword.py` (278 lines)
+   - Deleted `nextcloud_mcp_server/search/fuzzy.py` (220 lines)
+   - Deleted `nextcloud_mcp_server/search/hybrid.py` (238 lines - custom RRF)
+   - Updated `nextcloud_mcp_server/search/__init__.py` to export only BM25 hybrid
+
+**Migration Strategy:**
+- No migration required (vector sync feature is experimental)
+- New documents automatically indexed with both dense + sparse vectors
+- Collection re-creation on first startup with updated schema
+
+**Test Results:**
+- All unit tests passing (118 passed)
+- All integration tests passing (7 semantic search tests)
+- Code formatting verified with ruff
+
+**Benefits Realized:**
+- ✅ Consolidated architecture (single Qdrant database for both dense + sparse)
+- ✅ Native fusion algorithms (database-level, more efficient)
+- ✅ Industry-standard BM25 (replaces custom keyword search)
+- ✅ Simplified codebase (removed 736 lines of legacy code)
+- ✅ Better relevance (handles both semantic and keyword queries)
+- ✅ Configurable fusion methods (RRF and DBSF)
+
+---
+
+### 7. Fusion Algorithm Options
+
+**Update: 2025-11-16**
+
+The BM25 hybrid search now supports two fusion algorithms for combining dense (semantic) and sparse (BM25) search results:
+
+#### Reciprocal Rank Fusion (RRF)
+
+**Default fusion method.** RRF is a widely-used, well-established algorithm that combines rankings from multiple retrieval systems using the reciprocal rank formula:
+
+```
+RRF(doc) = Σ 1/(k + rank_i(doc))
+```
+
+where `k` is a constant (typically 60) and `rank_i(doc)` is the rank of the document in retrieval system `i`.
+
+**Characteristics:**
+- ✅ **General-purpose**: Works well across diverse query types and document collections
+- ✅ **Rank-based**: Focuses on relative rankings rather than absolute scores
+- ✅ **Established**: Well-tested, documented, and understood in IR literature
+- ✅ **Robust**: Less sensitive to score distribution differences between systems
+
+**When to use RRF:**
+- Default choice for most use cases
+- When you have mixed query types (semantic + keyword)
+- When retrieval systems have very different score ranges
+- When you want predictable, well-understood behavior
+
+#### Distribution-Based Score Fusion (DBSF)
+
+**Alternative fusion method.** DBSF normalizes scores from each retrieval system using distribution statistics before combining them:
+
+1. **Normalization**: For each query, calculates mean (μ) and standard deviation (σ) of scores
+2. **Outlier handling**: Uses μ ± 3σ as normalization bounds
+3. **Fusion**: Sums normalized scores across systems
+
+**Characteristics:**
+- ✅ **Score-aware**: Uses actual relevance scores, not just rankings
+- ✅ **Statistical**: Normalizes based on score distribution properties
+- ⚠️ **Experimental**: Newer algorithm, less battle-tested than RRF
+- ⚠️ **Sensitive**: May behave differently depending on score distributions
+
+**When to use DBSF:**
+- When retrieval systems have vastly different score ranges that RRF doesn't balance well
+- When you want to experiment with score-based (vs rank-based) fusion
+- When statistical normalization better matches your use case
+- For A/B testing against RRF to measure retrieval quality improvements
+
+#### Configuration
+
+Both fusion algorithms are exposed via the `fusion` parameter in MCP tools:
+
+```python
+# Use RRF (default)
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf"  # Can be omitted, RRF is default
+)
+
+# Use DBSF
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="dbsf"
+)
+```
+
+The `nc_semantic_search_answer` tool also supports the `fusion` parameter and passes it through to the underlying search.
+
+#### Future: Configurable Weights
+
+**Current limitation**: Neither RRF nor DBSF currently support per-system weights (e.g., 0.8 for semantic, 0.2 for BM25). This is a Qdrant platform limitation tracked in [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067).
+
+When Qdrant adds weight support, the `fusion` parameter can be extended to accept weight configurations:
+
+```python
+# Hypothetical future API
+response = await nc_semantic_search(
+    query="async programming",
+    fusion="rrf",
+    fusion_weights={"dense": 0.7, "sparse": 0.3}  # Not yet implemented
+)
+```
+
+**Recommendation**: Start with RRF (default). If you encounter cases where keyword matches are under- or over-weighted, experiment with DBSF. Monitor [qdrant/qdrant#6067](https://github.com/qdrant/qdrant/issues/6067) for configurable weight support.
@@ -0,0 +1,380 @@
+# ADR-015: Unified Provider Architecture for Embeddings and Text Generation
+
+**Status:** Accepted
+**Date:** 2025-01-16
+**Deciders:** Development Team
+**Related:** ADR-003 (Vector Database), ADR-008 (MCP Sampling), ADR-013 (RAG Evaluation)
+
+## Context
+
+Prior to this refactoring, the codebase had two separate provider systems:
+
+1. **Embedding Providers** (`nextcloud_mcp_server/embedding/`)
+   - Used `EmbeddingProvider` ABC with methods: `embed()`, `embed_batch()`, `get_dimension()`
+   - Had auto-detection via `EmbeddingService._detect_provider()`
+   - Used for semantic search and vector indexing (production)
+
+2. **LLM Providers** (`tests/rag_evaluation/llm_providers.py`)
+   - Used `LLMProvider` Protocol with method: `generate()`
+   - Had separate factory function `create_llm_provider()`
+   - Used only for RAG evaluation tests (not production)
+
+This fragmentation created several problems:
+
+### Problems with Dual Provider Systems
+
+1. **Code Duplication**
+   - Ollama configuration appeared in both `embedding/service.py` and `tests/rag_evaluation/llm_providers.py`
+   - Similar provider detection logic in multiple places
+   - Separate singleton patterns for each system
+
+2. **Limited Extensibility**
+   - Hard-coded provider detection in `EmbeddingService._detect_provider()`
+   - No support for providers that offer both capabilities (like Bedrock)
+   - Adding new providers required modifying multiple files
+
+3. **Inconsistent Patterns**
+   - BM25 provider didn't follow `EmbeddingProvider` ABC
+   - Different method names across providers (`embed` vs `encode`)
+   - ABC vs Protocol for type checking
+
+4. **Difficult Scaling**
+   - Adding Amazon Bedrock (our third provider) would exacerbate all issues
+   - No clear path for future providers (OpenAI, Cohere, etc.)
+
+### Amazon Bedrock Requirements
+
+Bedrock naturally supports **both** embeddings and text generation:
+- **Embeddings**: `amazon.titan-embed-text-v1/v2`, `cohere.embed-*`
+- **Text Generation**: `anthropic.claude-*`, `meta.llama3-*`, `amazon.titan-text-*`
+- **Unified API**: Single `invoke_model()` method via bedrock-runtime
+
+This made it the perfect opportunity to establish a unified provider architecture.
+
+## Decision
+
+We refactored the provider infrastructure to use a **unified Provider ABC** with optional capabilities:
+
+### 1. Unified Provider Interface
+
+**New Structure:**
+```
+nextcloud_mcp_server/providers/
+├── __init__.py
+├── base.py              # Provider ABC with optional capabilities
+├── registry.py          # Auto-detection and factory
+├── ollama.py            # Supports both embedding + generation
+├── anthropic.py         # Generation only
+├── bedrock.py           # Supports both embedding + generation
+└── simple.py            # Embedding only (testing fallback)
+```
+
+**Base Class (`providers/base.py`):**
+```python
+class Provider(ABC):
+    @property
+    @abstractmethod
+    def supports_embeddings(self) -> bool:
+        """Whether this provider supports embedding generation."""
+        pass
+
+    @property
+    @abstractmethod
+    def supports_generation(self) -> bool:
+        """Whether this provider supports text generation."""
+        pass
+
+    @abstractmethod
+    async def embed(self, text: str) -> list[float]:
+        """Generate embedding (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Generate batch embeddings (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    def get_dimension(self) -> int:
+        """Get embedding dimension (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def generate(self, prompt: str, max_tokens: int = 500) -> str:
+        """Generate text (raises NotImplementedError if not supported)."""
+        pass
+
+    @abstractmethod
+    async def close(self) -> None:
+        """Close provider and release resources."""
+        pass
+```
+
+### 2. Provider Registry
+
+**Auto-Detection Priority** (`providers/registry.py`):
+```python
+class ProviderRegistry:
+    @staticmethod
+    def create_provider() -> Provider:
+        # 1. Bedrock (AWS_REGION or BEDROCK_*_MODEL)
+        # 2. Ollama (OLLAMA_BASE_URL)
+        # 3. Simple (fallback)
+```
+
+**Environment Variables:**
+
+**Bedrock:**
+- `AWS_REGION`: AWS region (e.g., "us-east-1")
+- `AWS_ACCESS_KEY_ID`: AWS access key (optional, uses credential chain)
+- `AWS_SECRET_ACCESS_KEY`: AWS secret key (optional)
+- `BEDROCK_EMBEDDING_MODEL`: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
+- `BEDROCK_GENERATION_MODEL`: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
+
+**Ollama:**
+- `OLLAMA_BASE_URL`: Ollama API base URL (e.g., "http://localhost:11434")
+- `OLLAMA_EMBEDDING_MODEL`: Model for embeddings (default: "nomic-embed-text")
+- `OLLAMA_GENERATION_MODEL`: Model for text generation (e.g., "llama3.2:1b")
+- `OLLAMA_VERIFY_SSL`: Verify SSL certificates (default: "true")
+
+**Simple (no configuration, fallback):**
+- `SIMPLE_EMBEDDING_DIMENSION`: Embedding dimension (default: 384)
+
+### 3. Backward Compatibility
+
+**Old Code Continues to Work:**
+```python
+# Old way (still works)
+from nextcloud_mcp_server.embedding import get_embedding_service
+
+service = get_embedding_service()  # Returns singleton Provider
+embeddings = await service.embed_batch(texts)
+```
+
+**New Way (recommended):**
+```python
+# New way (cleaner)
+from nextcloud_mcp_server.providers import get_provider
+
+provider = get_provider()  # Returns singleton Provider
+embeddings = await provider.embed_batch(texts)
+
+# Can also use generation if provider supports it
+if provider.supports_generation:
+    text = await provider.generate("prompt")
+```
+
+**Migration Path:**
+- `embedding/service.py` now wraps `providers.get_provider()` for compatibility
+- `tests/rag_evaluation/llm_providers.py` now uses unified providers
+- Old imports still work, marked as deprecated in docstrings
+
+### 4. Amazon Bedrock Implementation
+
+**Features:**
+- Supports both embeddings and text generation
+- Model-specific request/response handling for:
+  - Titan Embed (amazon.titan-embed-text-*)
+  - Cohere Embed (cohere.embed-*)
+  - Claude (anthropic.claude-*)
+  - Llama (meta.llama3-*)
+  - Titan Text (amazon.titan-text-*)
+  - Mistral (mistral.*)
+- Uses boto3 bedrock-runtime client
+- Graceful degradation if boto3 not installed
+- Async implementation matching existing patterns
+
+**Model-Specific Handling:**
+```python
+# Bedrock embedding request (Titan)
+{"inputText": text}
+
+# Bedrock generation request (Claude)
+{
+    "anthropic_version": "bedrock-2023-05-31",
+    "max_tokens": max_tokens,
+    "temperature": 0.7,
+    "messages": [{"role": "user", "content": prompt}]
+}
+```
+
+## Consequences
+
+### Positive
+
+1. **Sustainable Provider Additions**
+   - New providers only need to implement `Provider` ABC
+   - Auto-detection via environment variables
+   - No modifications to existing code required
+
+2. **Code Consolidation**
+   - Single provider interface instead of two
+   - Unified configuration pattern
+   - Eliminated duplication
+
+3. **Better Extensibility**
+   - Providers can support one or both capabilities
+   - Clear capability detection via properties
+   - Registry pattern simplifies auto-detection
+
+4. **Improved Testing**
+   - RAG evaluation can use any provider (Ollama, Anthropic, Bedrock)
+   - Comprehensive unit tests for all providers
+   - Mocked boto3 tests for Bedrock
+
+5. **Production-Ready Bedrock Support**
+   - Full embedding and generation support
+   - Multiple model families supported
+   - AWS credential chain integration
+
+### Neutral
+
+1. **Optional Boto3 Dependency**
+   - boto3 is dev dependency only (not required for core functionality)
+   - Bedrock provider gracefully fails if boto3 not installed
+   - Users who want Bedrock must `pip install boto3`
+
+2. **Capability Properties**
+   - All providers must implement capability properties
+   - Methods raise `NotImplementedError` if capability not supported
+   - Clear error messages guide users to alternatives
+
+### Negative
+
+1. **Migration Effort**
+   - Existing code must be migrated to new imports (optional, backward compatible)
+   - Documentation needs updating
+   - Users must learn new environment variables
+
+2. **Increased Complexity**
+   - Provider base class has more methods (embedding + generation)
+   - More environment variables to configure
+   - Capability detection adds runtime checks
+
+## Implementation
+
+### Files Created
+
+**New Provider Infrastructure:**
+- `nextcloud_mcp_server/providers/__init__.py`
+- `nextcloud_mcp_server/providers/base.py`
+- `nextcloud_mcp_server/providers/registry.py`
+- `nextcloud_mcp_server/providers/ollama.py`
+- `nextcloud_mcp_server/providers/anthropic.py`
+- `nextcloud_mcp_server/providers/bedrock.py`
+- `nextcloud_mcp_server/providers/simple.py`
+
+**Tests:**
+- `tests/unit/providers/__init__.py`
+- `tests/unit/providers/test_bedrock.py` (9 unit tests)
+
+**Documentation:**
+- `docs/ADR-015-unified-provider-architecture.md` (this file)
+
+### Files Modified
+
+**Backward Compatibility:**
+- `nextcloud_mcp_server/embedding/service.py` - Now wraps `get_provider()`
+- `tests/rag_evaluation/llm_providers.py` - Uses unified providers
+
+**Dependencies:**
+- `pyproject.toml` - Added `boto3>=1.35.0` to dev dependencies
+
+### Testing Results
+
+**Unit Tests:** 127 passed (including 9 new Bedrock tests)
+**Type Checking:** All checks passed (ty)
+**Linting:** All checks passed (ruff)
+**Backward Compatibility:** Verified - existing embedding tests work
+
+## Alternatives Considered
+
+### Alternative 1: Keep Separate Provider Systems
+
+**Pros:**
+- No refactoring needed
+- Simpler short-term
+
+**Cons:**
+- Bedrock would need to be implemented twice
+- Continued code duplication
+- No long-term scalability
+
+**Decision:** Rejected - technical debt would continue to grow
+
+### Alternative 2: Separate Embedding and Generation Providers
+
+Use composition instead of unified interface:
+```python
+class CombinedProvider:
+    def __init__(self, embedding: EmbeddingProvider, generation: LLMProvider):
+        self.embedding = embedding
+        self.generation = generation
+```
+
+**Pros:**
+- Clearer separation of concerns
+- Simpler individual providers
+
+**Cons:**
+- Bedrock and Ollama naturally do both - artificial separation
+- More complex configuration (two providers to configure)
+- More boilerplate code
+
+**Decision:** Rejected - unified interface better matches provider capabilities
+
+### Alternative 3: Plugin System
+
+Dynamic provider registration via entry points:
+```python
+# setup.py
+entry_points={
+    'nextcloud_mcp.providers': [
+        'ollama = nextcloud_mcp_server.providers.ollama:OllamaProvider',
+        'bedrock = nextcloud_mcp_server.providers.bedrock:BedrockProvider',
+    ]
+}
+```
+
+**Pros:**
+- Most extensible
+- Third-party providers possible
+
+**Cons:**
+- Over-engineered for current needs
+- Added complexity
+- No immediate benefit
+
+**Decision:** Deferred - can add later if needed
+
+## Future Work
+
+1. **Additional Providers**
+   - OpenAI (embeddings + generation)
+   - Cohere (embeddings + generation)
+   - Google Vertex AI
+   - Azure OpenAI
+
+2. **Provider Features**
+   - Streaming generation support
+   - Batch API optimization (when available)
+   - Model-specific optimizations
+   - Cost tracking and metrics
+
+3. **Configuration Improvements**
+   - Provider profiles (development, production)
+   - Model aliasing (e.g., "small", "large")
+   - Fallback provider chains
+
+4. **Testing**
+   - Integration tests with real Bedrock endpoints
+   - Performance benchmarking across providers
+   - Cost comparison analysis
+
+## References
+
+- [boto3 Bedrock Runtime Documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Amazon Bedrock User Guide](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-bedrock.html)
+- ADR-003: Vector Database and Semantic Search
+- ADR-008: MCP Sampling for Semantic Search
+- ADR-013: RAG Evaluation Framework
@@ -0,0 +1,492 @@
+# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
+
+**Status:** Proposed
+**Date:** 2025-01-22
+**Deciders:** Development Team
+**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
+
+## Context
+
+[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
+
+- **Discovery**: Marketplace listing for MCP servers
+- **Hosting**: Containerized deployment with auto-scaling
+- **Authentication UI**: OAuth flow presentation for users
+- **Session Configuration**: Per-user settings passed via URL parameters
+- **Observability**: Usage logs and monitoring
+
+### Current Architecture Limitations
+
+The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
+
+1. **Persistent Infrastructure**
+   - Qdrant vector database for semantic search
+   - Background sync worker for content indexing
+   - Refresh token storage for offline access
+
+2. **Single-Tenant Configuration**
+   - Environment variables configure one Nextcloud instance
+   - `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
+   - Or OAuth with a single IdP
+
+3. **Stateful Operations**
+   - Vector sync maintains index state across requests
+   - Token storage persists between sessions
+
+### Smithery Hosting Constraints
+
+Smithery-hosted containers are **stateless by design**:
+
+- No persistent storage between requests
+- No background workers or cron jobs
+- No databases (Qdrant, Redis, etc.)
+- Containers may be recycled at any time
+- Configuration passed per-session via URL parameters
+
+### Opportunity
+
+Many users have **publicly accessible Nextcloud instances** and want to:
+
+1. Try the MCP server without self-hosting infrastructure
+2. Connect multiple users to different Nextcloud instances
+3. Use basic Nextcloud tools without semantic search
+4. Benefit from Smithery's discovery and OAuth UI
+
+## Decision
+
+Implement a **stateless deployment mode** for Smithery that:
+
+1. **Disables stateful features** (vector sync, semantic search)
+2. **Creates clients per-session** from Smithery configuration
+3. **Supports multiple Nextcloud instances** via session config
+4. **Provides a useful subset of tools** that work without infrastructure
+
+### Architecture
+
+```
+┌─────────────────────────────────────────────────────────────────────────┐
+│                    Smithery-Hosted Stateless Mode                        │
+├─────────────────────────────────────────────────────────────────────────┤
+│                                                                          │
+│  MCP Client                    Smithery                                  │
+│  (Cursor, Claude)              Infrastructure                            │
+│        │                            │                                    │
+│        │ 1. Connect                 │                                    │
+│        ├───────────────────────────►│                                    │
+│        │                            │                                    │
+│        │ 2. Config UI               │                                    │
+│        │◄───────────────────────────┤  User enters:                      │
+│        │    (Smithery presents)     │  - nextcloud_url                   │
+│        │                            │  - auth_mode (basic/oauth)         │
+│        │                            │  - credentials                     │
+│        │ 3. Tool call               │                                    │
+│        ├───────────────────────────►│                                    │
+│        │    + session config        │                                    │
+│        │                            │                                    │
+│        │                    ┌───────┴───────┐                            │
+│        │                    │  MCP Server   │                            │
+│        │                    │  Container    │                            │
+│        │                    │               │                            │
+│        │                    │ 4. Create     │                            │
+│        │                    │    client     │                            │
+│        │                    │    from       │                            │
+│        │                    │    config     │                            │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │                    │ 5. Call       │                            │
+│        │                    │    Nextcloud  │───────► User's Nextcloud   │
+│        │                    │    API        │         Instance           │
+│        │                    │      │        │                            │
+│        │                    │      ▼        │                            │
+│        │ 6. Response        │ Return result │                            │
+│        │◄───────────────────┤               │                            │
+│        │                    └───────────────┘                            │
+│                                                                          │
+└─────────────────────────────────────────────────────────────────────────┘
+```
+
+### Session Configuration Schema
+
+```python
+from pydantic import BaseModel, Field
+
+class SmitheryConfigSchema(BaseModel):
+    """Configuration schema for Smithery session."""
+
+    # Required: Nextcloud instance
+    nextcloud_url: str = Field(
+        ...,
+        description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+    )
+
+    # Authentication mode
+    auth_mode: str = Field(
+        "app_password",
+        description="Authentication method: 'app_password' or 'oauth'"
+    )
+
+    # App Password authentication (recommended for Smithery)
+    username: str | None = Field(
+        None,
+        description="Nextcloud username (required for app_password auth)"
+    )
+    app_password: str | None = Field(
+        None,
+        description="Nextcloud app password (Settings → Security → App passwords)"
+    )
+
+    # OAuth authentication (advanced)
+    # When auth_mode='oauth', Smithery handles the OAuth flow
+    # and passes the access token automatically
+```
+
+### Feature Matrix
+
+| Feature | Self-Hosted | Smithery Stateless |
+|---------|-------------|-------------------|
+| **Notes** | | |
+| List/Search notes | ✓ | ✓ |
+| Get/Create/Update notes | ✓ | ✓ |
+| Semantic search | ✓ | ✗ |
+| **Calendar** | | |
+| List calendars | ✓ | ✓ |
+| Get/Create events | ✓ | ✓ |
+| **Contacts** | | |
+| List address books | ✓ | ✓ |
+| Search/Get contacts | ✓ | ✓ |
+| **Files (WebDAV)** | | |
+| List/Download files | ✓ | ✓ |
+| Upload files | ✓ | ✓ |
+| Search files | ✓ | ✓ (keyword only) |
+| **Deck** | | |
+| List boards/cards | ✓ | ✓ |
+| Create/Update cards | ✓ | ✓ |
+| **Tables** | | |
+| List/Query tables | ✓ | ✓ |
+| Create/Update rows | ✓ | ✓ |
+| **Cookbook** | | |
+| List/Get recipes | ✓ | ✓ |
+| **Semantic Search** | | |
+| Vector search | ✓ | ✗ |
+| RAG answers | ✓ | ✗ |
+| **Background Sync** | | |
+| Auto-indexing | ✓ | ✗ |
+| Webhook sync | ✓ | ✗ |
+| **Admin UI (`/app`)** | | |
+| Vector sync status | ✓ | ✗ |
+| Vector visualization | ✓ | ✗ |
+| Webhook management | ✓ | ✗ |
+| Session management | ✓ | ✗ |
+
+### Implementation
+
+#### 1. Deployment Mode Detection
+
+```python
+# nextcloud_mcp_server/config.py
+
+class DeploymentMode(Enum):
+    SELF_HOSTED = "self_hosted"      # Full features, env-based config
+    SMITHERY_STATELESS = "smithery"  # Stateless, session-based config
+
+def get_deployment_mode() -> DeploymentMode:
+    """Detect deployment mode from environment."""
+    if os.getenv("SMITHERY_DEPLOYMENT") == "true":
+        return DeploymentMode.SMITHERY_STATELESS
+    return DeploymentMode.SELF_HOSTED
+```
+
+#### 2. Session-Based Client Factory
+
+```python
+# nextcloud_mcp_server/context.py
+
+async def get_client(ctx: Context) -> NextcloudClient:
+    """Get NextcloudClient - from session config or environment."""
+
+    mode = get_deployment_mode()
+
+    if mode == DeploymentMode.SMITHERY_STATELESS:
+        # Create client from Smithery session config
+        config = ctx.session_config
+        if not config:
+            raise McpError("Session configuration required")
+
+        return NextcloudClient(
+            base_url=config.nextcloud_url,
+            username=config.username,
+            password=config.app_password,
+        )
+    else:
+        # Existing behavior: from environment or OAuth context
+        return await _get_client_from_context(ctx)
+```
+
+#### 3. Conditional Tool Registration
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_mcp_server(mode: DeploymentMode) -> FastMCP:
+    """Create MCP server with mode-appropriate tools."""
+
+    mcp = FastMCP("Nextcloud MCP")
+
+    # Always register core tools
+    configure_notes_tools(mcp)
+    configure_calendar_tools(mcp)
+    configure_contacts_tools(mcp)
+    configure_webdav_tools(mcp)
+    configure_deck_tools(mcp)
+    configure_tables_tools(mcp)
+    configure_cookbook_tools(mcp)
+
+    # Only register stateful tools in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        configure_semantic_tools(mcp)  # Requires Qdrant
+        register_oauth_tools(mcp)       # Requires token storage
+
+    return mcp
+```
+
+#### 4. Exclude Admin UI Routes
+
+The `/app` admin UI should **not be installed** in Smithery mode because:
+
+- **Vector sync status** - No vector sync in stateless mode
+- **Vector visualization** - No Qdrant to visualize
+- **Webhook management** - No webhook sync without background workers
+- **Session management** - No persistent sessions to manage
+
+```python
+# nextcloud_mcp_server/app.py
+
+def create_app(mode: DeploymentMode) -> Starlette:
+    """Create Starlette app with mode-appropriate routes."""
+
+    routes = [
+        Route("/health/live", health_live, methods=["GET"]),
+        Route("/health/ready", health_ready, methods=["GET"]),
+    ]
+
+    # Only mount admin UI in self-hosted mode
+    if mode == DeploymentMode.SELF_HOSTED:
+        browser_app = create_browser_app()
+        routes.append(
+            Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
+        )
+        routes.append(Mount("/app", app=browser_app))
+        logger.info("Admin UI mounted at /app")
+    else:
+        logger.info("Admin UI disabled in Smithery stateless mode")
+
+    # Mount FastMCP at root
+    mcp_app = create_mcp_server(mode).streamable_http_app()
+    routes.append(Mount("/", app=mcp_app))
+
+    return Starlette(routes=routes, lifespan=starlette_lifespan)
+```
+
+**Endpoints by Mode:**
+
+| Endpoint | Self-Hosted | Smithery |
+|----------|-------------|----------|
+| `/mcp` | ✓ | ✓ |
+| `/health/live` | ✓ | ✓ |
+| `/health/ready` | ✓ | ✓ |
+| `/.well-known/mcp-config` | ✓ | ✓ |
+| `/app` | ✓ | ✗ |
+| `/app/vector-sync/status` | ✓ | ✗ |
+| `/app/vector-viz` | ✓ | ✗ |
+| `/app/webhooks` | ✓ | ✗ |
+
+#### 5. Smithery Integration Files
+
+**smithery.yaml:**
+```yaml
+runtime: "container"
+build:
+  dockerfile: "Dockerfile.smithery"
+  dockerBuildPath: "."
+startCommand:
+  type: "http"
+  configSchema:
+    type: "object"
+    required: ["nextcloud_url", "username", "app_password"]
+    properties:
+      nextcloud_url:
+        type: "string"
+        title: "Nextcloud URL"
+        description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
+      username:
+        type: "string"
+        title: "Username"
+        description: "Your Nextcloud username"
+      app_password:
+        type: "string"
+        title: "App Password"
+        description: "Generate at Settings → Security → App passwords"
+  exampleConfig:
+    nextcloud_url: "https://cloud.example.com"
+    username: "alice"
+    app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
+```
+
+**Dockerfile.smithery:**
+```dockerfile
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# Install uv
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
+
+# Copy project files
+COPY pyproject.toml uv.lock ./
+COPY nextcloud_mcp_server ./nextcloud_mcp_server
+
+# Install dependencies (without vector/semantic extras)
+RUN uv sync --frozen --no-dev
+
+# Set Smithery mode
+ENV SMITHERY_DEPLOYMENT=true
+ENV VECTOR_SYNC_ENABLED=false
+
+# Smithery sets PORT=8081
+EXPOSE 8081
+
+CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
+```
+
+**nextcloud_mcp_server/smithery_main.py:**
+```python
+"""Smithery-specific entrypoint for stateless deployment."""
+
+import os
+import uvicorn
+from starlette.middleware.cors import CORSMiddleware
+
+from nextcloud_mcp_server.app import create_mcp_server
+from nextcloud_mcp_server.config import DeploymentMode
+
+def main():
+    # Force stateless mode
+    os.environ["SMITHERY_DEPLOYMENT"] = "true"
+    os.environ["VECTOR_SYNC_ENABLED"] = "false"
+
+    mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
+    app = mcp.streamable_http_app()
+
+    # Add CORS for browser-based clients
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["GET", "POST", "OPTIONS"],
+        allow_headers=["*"],
+        expose_headers=["mcp-session-id", "mcp-protocol-version"],
+    )
+
+    # Smithery sets PORT environment variable
+    port = int(os.environ.get("PORT", 8081))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+
+if __name__ == "__main__":
+    main()
+```
+
+### Security Considerations
+
+1. **App Passwords over User Passwords**
+   - Smithery config encourages app passwords (revocable, scoped)
+   - Documentation guides users to create dedicated app passwords
+   - App passwords can be revoked without changing main password
+
+2. **HTTPS Required**
+   - `nextcloud_url` must be HTTPS for production use
+   - Validation rejects HTTP URLs in Smithery mode
+
+3. **No Credential Storage**
+   - Credentials exist only for request duration
+   - No server-side persistence of user credentials
+   - Smithery handles secure config transmission
+
+4. **Scope Limitation**
+   - Stateless mode cannot access offline_access
+   - No background operations on user's behalf
+   - Clear user expectation: tools work during session only
+
+### Migration Path
+
+Users can start with Smithery stateless mode and migrate to self-hosted:
+
+1. **Try on Smithery** → Basic tools, no setup
+2. **Self-host for semantic search** → Add Qdrant, enable vector sync
+3. **Full deployment** → Background sync, webhooks, multi-user OAuth
+
+## Consequences
+
+### Positive
+
+1. **Lower barrier to entry** - Users can try without infrastructure
+2. **Multi-user support** - Each session connects to different Nextcloud
+3. **Smithery ecosystem** - Discovery, observability, OAuth UI
+4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
+
+### Negative
+
+1. **No semantic search** - Key differentiator unavailable on Smithery
+2. **Per-request auth** - Credentials sent with each request
+3. **No offline access** - Cannot perform background operations
+4. **Maintenance burden** - Two deployment modes to support
+
+### Neutral
+
+1. **Feature subset** - May encourage users to self-host for full features
+2. **Documentation needs** - Clear guidance on mode differences required
+
+## Alternatives Considered
+
+### 1. External MCP Only
+
+**Approach:** Only support self-hosted external MCP registration on Smithery.
+
+**Rejected because:**
+- Higher barrier to entry for new users
+- Misses opportunity for Smithery marketplace visibility
+- Users want to try before committing to infrastructure
+
+### 2. Embedded Vector DB (SQLite-vec)
+
+**Approach:** Use SQLite with vector extensions for per-request indexing.
+
+**Rejected because:**
+- No persistence between requests anyway
+- Indexing latency too high for synchronous requests
+- Complexity without benefit in stateless context
+
+### 3. External Vector DB Service
+
+**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
+
+**Rejected because:**
+- Adds external dependency and cost
+- Per-user collections require complex multi-tenancy
+- Sync still impossible without background workers
+
+### 4. Hybrid: Smithery + User's Qdrant
+
+**Approach:** User provides their own Qdrant URL in session config.
+
+**Considered for future:**
+- Could enable semantic search for advanced users
+- Adds complexity to session config
+- Sync still requires external trigger (manual or webhook)
+
+## References
+
+- [Smithery Documentation](https://smithery.ai/docs)
+- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
+- [Smithery External MCPs](https://smithery.ai/docs/build/external)
+- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
+- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
@@ -0,0 +1,506 @@
+# ADR-017: Add MCP Tool Annotations for Enhanced Client UX
+
+## Status
+
+Implemented
+
+## Context
+
+The MCP Python SDK supports tool annotations that provide behavioral hints and improved UX to MCP clients. Currently, our 101 tools across 10 modules lack these annotations, resulting in:
+
+- Snake_case function names displayed to users (e.g., "nc_notes_create_note" instead of "Create Note")
+- No behavioral hints for clients about read-only, destructive, or idempotent operations
+- Missing parameter descriptions for better auto-completion and inline help
+- Clients cannot optimize caching, warn before destructive operations, or retry safely
+
+### Available MCP Annotations
+
+The MCP SDK provides three types of annotations:
+
+#### 1. Tool Decorator Parameters
+```python
+@mcp.tool(
+    title="Human-Readable Name",
+    description="Tool description",  # Can also come from docstring
+    annotations=ToolAnnotations(...),
+    icons=[Icon(...)]  # Optional visual icons
+)
+```
+
+#### 2. ToolAnnotations Behavioral Hints
+```python
+from mcp.types import ToolAnnotations
+
+ToolAnnotations(
+    title="Alternative Title",  # Decorator title takes precedence
+    readOnlyHint=True,         # Tool doesn't modify data
+    destructiveHint=True,       # Tool may delete/overwrite data
+    idempotentHint=True,        # Repeated calls with same args are safe
+    openWorldHint=True          # Interacts with external entities
+)
+```
+
+#### 3. Parameter Descriptions
+```python
+from pydantic import Field
+
+async def tool(
+    param: str = Field(description="What this parameter does"),
+    ctx: Context
+):
+```
+
+### Idempotency Analysis
+
+**Important**: Idempotency means calling with **the same inputs** produces the same result.
+
+**NOT Idempotent** (different inputs each call):
+- **Updates with etag**: `update_note(id=1, title="X", etag="abc")` → etag changes to "def"
+  - Second call: `update_note(id=1, title="X", etag="abc")` → fails (etag mismatch)
+  - Different input (stale etag) → different result (error)
+- **Creates**: `create_note(title="X")` → creates note 1
+  - Second call → creates note 2 (different result)
+- **Append operations**: `append_content(id=1, text="X")` → adds X once
+  - Second call → adds X again (different result)
+
+**Idempotent**:
+- **Deletes**: `delete_note(id=1)` → note deleted
+  - Second call → 404 or success (same end state: note doesn't exist)
+  - Note: May return different status code, but end state is identical
+- **Full resource PUT without version control**: `write_file(path="/test.txt", content="Hello")` → file has "Hello"
+  - Second call → file still has "Hello" (same end state)
+  - Example: `nc_webdav_write_file` uses HTTP PUT without etags/version control
+- **Set operations**: `set_property(id=1, value="X")` → property = X
+  - Second call → property still = X (same result)
+  - Note: Nextcloud updates with etags use version control, so not idempotent
+
+**Read-Only** (always idempotent, never destructive):
+- All list, search, get operations
+
+## Decision
+
+Add annotations to all 101 tools in three phases:
+
+### Phase 1: Titles (Quick Win)
+Add human-readable titles to all tools:
+
+```python
+@mcp.tool(title="Create Note")
+async def nc_notes_create_note(...):
+```
+
+**Effort**: 2-3 hours
+**Impact**: Immediate UX improvement
+
+### Phase 2: ToolAnnotations (Behavioral Hints)
+Add annotations based on corrected categorization:
+
+```python
+# Read-only tools
+@mcp.tool(
+    title="Search Notes",
+    annotations=ToolAnnotations(
+        readOnlyHint=True,
+        openWorldHint=True  # Nextcloud is external to MCP server
+    )
+)
+
+# Delete tools (idempotent: same end state)
+@mcp.tool(
+    title="Delete Note",
+    annotations=ToolAnnotations(
+        destructiveHint=True,
+        idempotentHint=True,  # Deleting deleted item = same end state
+        openWorldHint=True
+    )
+)
+
+# Create tools (not idempotent: creates multiple items)
+@mcp.tool(
+    title="Create Note",
+    annotations=ToolAnnotations(
+        idempotentHint=False,
+        openWorldHint=True
+    )
+)
+
+# Update tools with etag (not idempotent: etag changes)
+@mcp.tool(
+    title="Update Note",
+    annotations=ToolAnnotations(
+        idempotentHint=False,  # Etag required = different inputs each time
+        openWorldHint=True
+    )
+)
+
+# Append operations (not idempotent: adds content each time)
+@mcp.tool(
+    title="Append to Note",
+    annotations=ToolAnnotations(
+        idempotentHint=False,
+        openWorldHint=True
+    )
+)
+```
+
+**Effort**: 4-6 hours
+**Impact**: Better client behavior (caching, warnings, retry logic)
+
+### Phase 3: Parameter Descriptions
+Add Field() descriptions to parameters:
+
+```python
+from pydantic import Field
+
+@mcp.tool(title="Create Note", annotations=ToolAnnotations(idempotentHint=False))
+async def nc_notes_create_note(
+    title: str = Field(description="The title of the note"),
+    content: str = Field(description="Markdown content of the note"),
+    category: str = Field(description="Category or folder name for organizing"),
+    ctx: Context
+) -> CreateNoteResponse:
+```
+
+**Effort**: 6-8 hours
+**Impact**: Better auto-completion and inline help
+
+## Tool Categorization
+
+### Read-Only Tools (~40 tools)
+**Pattern**: List, search, get operations
+**Annotations**: `readOnlyHint=True`, `openWorldHint=True`
+
+Examples:
+- `nc_notes_search_notes` → "Search Notes"
+- `nc_webdav_list_directory` → "List Files and Directories"
+- `nc_calendar_list_calendars` → "List Calendars"
+- `nc_contacts_get_contact` → "Get Contact"
+- `nc_semantic_search` → "Semantic Search"
+- `check_logged_in` → "Check Server Login Status"
+
+### Create Tools (~20 tools)
+**Pattern**: Create new resources
+**Annotations**: `idempotentHint=False`, `openWorldHint=True`
+
+Examples:
+- `nc_notes_create_note` → "Create Note"
+- `nc_calendar_create_event` → "Create Calendar Event"
+- `nc_contacts_create_contact` → "Create Contact"
+- `deck_create_card` → "Create Kanban Card"
+- `nc_tables_create_row` → "Create Table Row"
+
+### Update Tools (~25 tools)
+**Pattern**: Modify existing resources with etag
+**Annotations**: `idempotentHint=False` (etag changes), `openWorldHint=True`
+
+Examples:
+- `nc_notes_update_note` → "Update Note"
+- `nc_calendar_update_event` → "Update Calendar Event"
+- `nc_contacts_update_contact` → "Update Contact"
+- `deck_update_card` → "Update Kanban Card"
+
+**Rationale**: Updates require etag, which changes after each update. Same parameters on second call will fail due to stale etag = NOT idempotent.
+
+### Append/Accumulate Tools (~5 tools)
+**Pattern**: Add content without replacing
+**Annotations**: `idempotentHint=False`, `openWorldHint=True`
+
+Examples:
+- `nc_notes_append_content` → "Append to Note"
+
+**Rationale**: Each call adds content, changing the result = NOT idempotent.
+
+### Delete Tools (~10 tools)
+**Pattern**: Remove resources
+**Annotations**: `destructiveHint=True`, `idempotentHint=True`, `openWorldHint=True`
+
+Examples:
+- `nc_notes_delete_note` → "Delete Note"
+- `nc_webdav_delete_resource` → "Delete File or Directory"
+- `nc_calendar_delete_event` → "Delete Calendar Event"
+- `nc_contacts_delete_contact` → "Delete Contact"
+
+**Rationale**: Deleting already-deleted item results in same end state (item doesn't exist) = idempotent. Status code may differ, but outcome is identical.
+
+### Special Cases
+
+#### OAuth Provisioning Tools
+```python
+# Not read-only but requires user interaction
+@mcp.tool(
+    title="Grant Server Access to Nextcloud",
+    annotations=ToolAnnotations(
+        readOnlyHint=False,
+        idempotentHint=False,  # Creates new OAuth session each time
+        openWorldHint=True
+    )
+)
+async def provision_nextcloud_access(ctx: Context):
+```
+
+#### Semantic Search (Closed World)
+```python
+@mcp.tool(
+    title="Semantic Search",
+    annotations=ToolAnnotations(
+        readOnlyHint=True,
+        openWorldHint=False  # Searches only indexed Nextcloud data
+    )
+)
+async def nc_semantic_search(query: str, ctx: Context):
+```
+
+**Rationale**: Semantic search only queries pre-indexed Nextcloud content, not the "open world" like web search would.
+
+## Tool Priority Matrix
+
+### Critical Priority (~2 tools)
+OAuth tools required for server functionality:
+- `provision_nextcloud_access` → "Grant Server Access to Nextcloud"
+- `check_logged_in` → "Check Server Login Status"
+
+### High Priority (~50 tools)
+Most commonly used modules:
+- **Notes** (14 tools): Create, read, update, delete notes
+- **WebDAV** (13 tools): File operations
+- **Calendar** (15 tools): Events and todos
+- **Semantic Search** (6 tools): AI-powered search
+- **Contacts** (9 tools): Address book operations
+
+### Medium Priority (~35 tools)
+Secondary functionality:
+- **Deck** (9 tools): Kanban boards
+- **Tables** (7 tools): Structured data
+- **Sharing** (5 tools): File sharing
+
+### Low Priority (~14 tools)
+Less frequently used:
+- **Cookbook** (8 tools): Recipe management
+- **News** (6 tools): RSS feeds
+
+## Implementation Plan
+
+### Week 1: Phase 1 - Titles
+- Add human-readable titles to all 101 tools
+- Update tool name mapping in documentation
+- Manual test in MCP inspector
+
+### Week 2: Phase 2 - ToolAnnotations (High Priority)
+- Add annotations to Critical and High priority tools (~52 tools)
+- Focus on Notes, WebDAV, Calendar, Semantic, OAuth
+- Add unit tests validating annotation presence
+
+### Week 3: Phase 2 - ToolAnnotations (Medium/Low Priority)
+- Complete remaining tools (~49 tools)
+- Deck, Tables, Contacts, Cookbook, News
+- Update tool listings in README
+
+### Week 4: Phase 3 - Parameter Descriptions
+- Add Field() descriptions to Critical/High priority tools
+- Start with OAuth, Notes, WebDAV modules
+- Incremental completion over time
+
+## Benefits
+
+### For Users
+- **Clearer UI**: "Create Note" vs "nc_notes_create_note"
+- **Safety**: Warnings before destructive operations
+- **Better help**: Parameter descriptions in auto-completion
+- **Confidence**: Know which operations are safe to retry
+
+### For MCP Clients
+- **Caching**: Cache results from read-only tools
+- **Safety prompts**: Warn before destructiveHint=true
+- **Retry logic**: Safely retry idempotent operations
+- **UI organization**: Group by behavior (reads vs writes vs deletes)
+- **Performance**: Optimize based on hints
+
+### For Developers
+- **Self-documenting**: Behavior is explicit
+- **Consistency**: Standard patterns across codebase
+- **Testing**: Validate annotations match implementation
+- **Maintenance**: Clear expectations for new tools
+
+## Consequences
+
+### Positive
+- Immediate UX improvement with minimal effort
+- Clients can make smarter decisions
+- Self-documenting code
+- Follows MCP best practices
+
+### Negative
+- Initial effort to add annotations (12-15 hours total)
+- Must maintain annotations when adding new tools
+- Risk of incorrect annotations misleading clients
+
+### Neutral
+- Annotations are hints, not guarantees
+- Clients may ignore annotations
+- Backward compatible (additive change)
+
+### Mitigations
+- **Incorrect annotations**: Add tests validating behavior matches hints
+- **Maintenance burden**: Add to code review checklist and tool template
+- **Documentation**: Update CLAUDE.md with annotation guidelines
+
+## Examples
+
+### Complete Annotated Tool (Delete)
+
+```python
+from mcp.types import ToolAnnotations
+from pydantic import Field
+
+@mcp.tool(
+    title="Delete Note",
+    annotations=ToolAnnotations(
+        destructiveHint=True,   # Deletes data permanently
+        idempotentHint=True,    # Same end state (note doesn't exist)
+        openWorldHint=True      # Nextcloud is external
+    )
+)
+@require_scopes("notes:write")
+@instrument_tool
+async def nc_notes_delete_note(
+    note_id: int = Field(description="The ID of the note to delete permanently"),
+    ctx: Context
+) -> DeleteNoteResponse:
+    """Delete a note permanently (requires notes:write scope)"""
+    client = await get_client(ctx)
+    # ... implementation ...
+```
+
+### Complete Annotated Tool (Update)
+
+```python
+@mcp.tool(
+    title="Update Note",
+    annotations=ToolAnnotations(
+        idempotentHint=False,   # NOT idempotent: etag changes each update
+        openWorldHint=True
+    )
+)
+@require_scopes("notes:write")
+@instrument_tool
+async def nc_notes_update_note(
+    note_id: int = Field(description="The ID of the note to update"),
+    title: str | None = Field(
+        default=None,
+        description="New title (omit to keep current)"
+    ),
+    content: str | None = Field(
+        default=None,
+        description="New markdown content (omit to keep current)"
+    ),
+    category: str | None = Field(
+        default=None,
+        description="New category/folder (omit to keep current)"
+    ),
+    etag: str = Field(
+        description="ETag from get_note (prevents concurrent modification)"
+    ),
+    ctx: Context
+) -> UpdateNoteResponse:
+    """Update an existing note's title, content, or category.
+
+    The etag parameter is required to prevent overwriting concurrent changes.
+    Get the current ETag by first calling nc_notes_get_note.
+    If the note has been modified since you retrieved it, the update will fail.
+    """
+    client = await get_client(ctx)
+    # ... implementation ...
+```
+
+### Complete Annotated Tool (Read-Only)
+
+```python
+@mcp.tool(
+    title="Search Notes",
+    annotations=ToolAnnotations(
+        readOnlyHint=True,    # Doesn't modify data
+        openWorldHint=True    # Queries Nextcloud
+    )
+)
+@require_scopes("notes:read")
+@instrument_tool
+async def nc_notes_search_notes(
+    query: str = Field(description="Search term to match in note titles or content"),
+    ctx: Context
+) -> SearchNotesResponse:
+    """Search notes by title or content, returning id, title, and category.
+
+    This is a read-only operation that searches across all user notes.
+    Use nc_notes_get_note to retrieve the full content of matching notes.
+    """
+    client = await get_client(ctx)
+    # ... implementation ...
+```
+
+## Testing Strategy
+
+### Unit Tests
+Add tests validating annotation presence and correctness:
+
+```python
+def test_notes_tools_have_annotations():
+    """Verify all notes tools have appropriate annotations."""
+    tools = get_registered_tools(mcp)
+
+    # Check create tool
+    create_tool = tools["nc_notes_create_note"]
+    assert create_tool.title == "Create Note"
+    assert create_tool.annotations.idempotentHint is False
+
+    # Check delete tool
+    delete_tool = tools["nc_notes_delete_note"]
+    assert delete_tool.title == "Delete Note"
+    assert delete_tool.annotations.destructiveHint is True
+    assert delete_tool.annotations.idempotentHint is True
+
+    # Check read-only tool
+    search_tool = tools["nc_notes_search_notes"]
+    assert search_tool.title == "Search Notes"
+    assert search_tool.annotations.readOnlyHint is True
+```
+
+### Integration Tests
+- Verify existing tests pass with annotations
+- Manual testing in MCP inspector/client
+
+### Documentation Updates
+- Update README tool listings with new titles
+- Add annotation guidelines to CLAUDE.md
+- Include examples in developer documentation
+
+## Resolved Questions
+
+1. **WebDAV write_file idempotency** (Resolved: 2025-12-11)
+   - **Decision**: Mark as `idempotentHint=True`
+   - **Rationale**: Uses HTTP PUT without version control. Writing same content to same path repeatedly produces identical end state, which is the definition of idempotency in HTTP semantics.
+
+2. **Semantic search openWorldHint** (Resolved: 2025-12-11)
+   - **Decision**: Mark as `openWorldHint=True`
+   - **Rationale**: For consistency with other Nextcloud tools. While the data being searched is "indexed/internal", Nextcloud itself is external to the MCP server. The fact that data is indexed is an implementation detail, not a fundamental difference from other Nextcloud queries.
+
+3. **Read-only with side effects**: Should tools that log analytics still be readOnlyHint=true?
+   - **Decision**: Yes. Logging/analytics are non-visible side effects that don't change user-observable state. Read-only refers to data modifications that affect the user's content.
+
+## Future Considerations
+
+1. **Icons**: Visual icons for tools (requires design work, deferred to future ADR)
+2. **Parameter descriptions**: Add Pydantic `Field(description=...)` for better auto-completion (Phase 3, future work)
+
+## References
+
+- MCP Python SDK: `/home/chris/Software/python-sdk/`
+- ToolAnnotations spec: `src/mcp/types.py:1247`
+- FastMCP decorator: `src/mcp/server/fastmcp/server.py:444`
+- Examples: `examples/fastmcp/parameter_descriptions.py`, `examples/fastmcp/icons_demo.py`
+
+## Decision Timeline
+
+- **Proposed**: 2025-12-11
+- **Reviewed**: 2025-12-11 (Self-review during implementation)
+- **Accepted**: 2025-12-11
+- **Implemented**: 2025-12-11 (Phase 1 & 2 complete)
@@ -0,0 +1,342 @@
+# ADR-020: Deployment Modes and Configuration Validation
+
+**Status:** Accepted
+**Date:** 2025-12-20
+**Deciders:** Development Team
+**Related:** ADR-002 (Vector Sync), ADR-004 (Progressive Consent), ADR-019 (Multi-user BasicAuth)
+
+## Context
+
+The MCP server supports multiple deployment scenarios with different authentication methods, storage backends, and feature sets. Over time, the configuration system evolved to support ~500+ possible combinations across deployment modes, authentication patterns, and feature toggles. This complexity made it difficult to:
+
+1. Understand what configuration is required for a given deployment
+2. Debug configuration errors (validation scattered across multiple files)
+3. Provide helpful error messages when configuration is invalid
+4. Maintain clear boundaries between deployment modes
+
+**Problems Identified:**
+- No single source of truth for "what config is required for mode X"
+- Validation happening at 4+ different points (Settings.__post_init__, setup_oauth_config(), context helpers, starlette_lifespan)
+- Startup sequence unclear (OAuth setup before FastMCP creation, sync initialization errors)
+- Error messages generic ("X is required") without explaining which deployment mode triggered the requirement
+- Multiple overlapping decision trees (deployment mode, auth mode, features)
+
+## Decision
+
+We formalize five distinct deployment modes with explicit configuration requirements and implement centralized configuration validation.
+
+### Deployment Modes
+
+#### 1. Single-User BasicAuth
+
+**Use Case:** Personal Nextcloud instance, local development
+
+**Required Configuration:**
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password  # Or app password
+```
+
+**Optional Configuration:**
+```bash
+# Vector sync (semantic search)
+VECTOR_SYNC_ENABLED=true
+QDRANT_LOCATION=/path/to/qdrant  # Or QDRANT_URL for remote
+
+# Embeddings (optional - Simple provider used as fallback)
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Document processing
+DOCUMENT_CHUNK_SIZE=512
+DOCUMENT_CHUNK_OVERLAP=50
+```
+
+**Characteristics:**
+- Single shared NextcloudClient created at startup
+- No OAuth infrastructure needed
+- No multi-user support
+- Vector sync runs as single-user background task
+- Admin UI available at /app
+
+---
+
+#### 2. Multi-User BasicAuth Pass-Through
+
+**Use Case:** Internal deployment where users provide their own credentials, no background sync needed
+
+**Required Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_MULTI_USER_BASIC_AUTH=true
+```
+
+**Optional Configuration:**
+```bash
+# For background sync (requires app passwords from Astrolabe)
+ENABLE_OFFLINE_ACCESS=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+VECTOR_SYNC_ENABLED=true
+# ... plus Qdrant and embedding config
+```
+
+**Conditional Requirements:**
+- If `ENABLE_OFFLINE_ACCESS=true`: requires `NEXTCLOUD_OIDC_CLIENT_ID`, `NEXTCLOUD_OIDC_CLIENT_SECRET`, `TOKEN_ENCRYPTION_KEY`, `TOKEN_STORAGE_DB`
+- If `VECTOR_SYNC_ENABLED=true`: requires `ENABLE_OFFLINE_ACCESS=true`
+
+**Characteristics:**
+- No OAuth for client authentication (uses BasicAuth in request headers)
+- BasicAuthMiddleware extracts credentials from Authorization header
+- Client created per-request from extracted credentials
+- Optional: Background sync using app passwords (via Astrolabe API)
+- Admin UI available at /app
+
+---
+
+#### 3. OAuth Single-Audience (Default)
+
+**Use Case:** Multi-user deployment with OAuth authentication, tokens work for both MCP and Nextcloud
+
+**Required Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+# No NEXTCLOUD_USERNAME/PASSWORD (triggers OAuth mode)
+```
+
+**Auto-Configured:**
+- OIDC discovery URL: `{NEXTCLOUD_HOST}/.well-known/openid-configuration`
+- Client credentials: Dynamic Client Registration (DCR) if available
+- Token storage: SQLite at `~/.oauth/clients.db`
+
+**Optional Configuration:**
+```bash
+# Static client credentials (instead of DCR)
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+
+# Offline access for background sync
+ENABLE_OFFLINE_ACCESS=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+VECTOR_SYNC_ENABLED=true
+# ... plus Qdrant and embedding config
+
+# Scopes
+NEXTCLOUD_OIDC_SCOPES="openid profile email notes:read notes:write ..."
+```
+
+**Conditional Requirements:**
+- If `ENABLE_OFFLINE_ACCESS=true`: requires `TOKEN_ENCRYPTION_KEY`, `TOKEN_STORAGE_DB`
+- If `VECTOR_SYNC_ENABLED=true`: requires `ENABLE_OFFLINE_ACCESS=true`
+
+**Characteristics:**
+- Tokens contain both `aud: ["mcp-server", "nextcloud"]`
+- Pass token through to Nextcloud APIs (no exchange)
+- Client created per-request from token in Authorization header
+- Background sync uses refresh tokens (if offline_access enabled)
+- Admin UI available at /app
+
+---
+
+#### 4. OAuth Token Exchange (RFC 8693)
+
+**Use Case:** Multi-user deployment where MCP token is separate from Nextcloud token
+
+**Required Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_TOKEN_EXCHANGE=true
+# No NEXTCLOUD_USERNAME/PASSWORD (triggers OAuth mode)
+```
+
+**Optional Configuration:**
+- Same as OAuth Single-Audience, plus:
+```bash
+TOKEN_EXCHANGE_CACHE_TTL=300  # Cache exchanged tokens
+```
+
+**Characteristics:**
+- Tokens contain only `aud: "mcp-server"`
+- MCP server exchanges token for Nextcloud token via RFC 8693
+- Exchanged tokens cached per-user
+- Client created per-request using exchanged token
+- Background sync uses refresh tokens (if offline_access enabled)
+
+---
+
+#### 5. Smithery Stateless
+
+**Use Case:** Multi-tenant SaaS deployment via Smithery platform
+
+**Required Configuration:**
+- None! Configuration comes from session URL params: `?nextcloud_url=...&username=...&app_password=...`
+
+**Forbidden Configuration:**
+- Must NOT set: `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`, `ENABLE_MULTI_USER_BASIC_AUTH`, `ENABLE_TOKEN_EXCHANGE`, `ENABLE_OFFLINE_ACCESS`, `VECTOR_SYNC_ENABLED`, `NEXTCLOUD_OIDC_CLIENT_ID`, `NEXTCLOUD_OIDC_CLIENT_SECRET`
+
+**Characteristics:**
+- No persistent storage (stateless)
+- Client created per-request from session config
+- No vector sync (disabled)
+- No admin UI (no /app routes)
+- No OAuth infrastructure
+
+---
+
+### Configuration Validation
+
+**Implementation:** `nextcloud_mcp_server/config_validators.py`
+
+**Key Functions:**
+```python
+def detect_auth_mode(settings: Settings) -> AuthMode:
+    """Detect authentication mode from configuration.
+
+    Priority (most specific to most general):
+    1. Smithery (explicit flag)
+    2. Token exchange (most specific OAuth mode)
+    3. Multi-user BasicAuth
+    4. Single-user BasicAuth
+    5. OAuth single-audience (default OAuth mode)
+    """
+
+def validate_configuration(settings: Settings) -> tuple[AuthMode, list[str]]:
+    """Validate configuration for detected mode.
+
+    Returns:
+        Tuple of (detected_mode, list_of_errors)
+        Empty list means valid configuration.
+    """
+```
+
+**Validation Rules:**
+- **Required variables:** Must be set and non-empty
+- **Forbidden variables:** Must NOT be set (or must be False for booleans)
+- **Conditional requirements:** If feature X is enabled, requires variables Y and Z
+
+**Error Messages:**
+```
+Configuration validation failed for {mode} mode:
+  - [{mode}] Missing required configuration: NEXTCLOUD_HOST
+  - [{mode}] ENABLE_OFFLINE_ACCESS must be enabled when VECTOR_SYNC_ENABLED is true
+
+Mode: {mode}
+Description: {mode_description}
+
+Required configuration:
+  - VAR1
+  - VAR2
+
+Optional configuration:
+  - VAR3
+  - VAR4
+
+Conditional requirements:
+  When FEATURE is enabled:
+    - VAR5
+    - VAR6
+```
+
+**Integration:**
+- Validation runs at app startup in `get_app()` (app.py:1048-1062)
+- All errors reported before any initialization begins
+- Mode-specific error messages explain requirements
+- Validation uses the same Settings object used throughout the app
+
+### Configuration Matrix
+
+| Variable | Single BasicAuth | Multi BasicAuth | OAuth Single | OAuth Exchange | Smithery |
+|----------|------------------|-----------------|--------------|----------------|----------|
+| **NEXTCLOUD_HOST** | Required | Required | Required | Required | Forbidden |
+| **NEXTCLOUD_USERNAME** | Required | Forbidden | Forbidden | Forbidden | Forbidden |
+| **NEXTCLOUD_PASSWORD** | Required | Forbidden | Forbidden | Forbidden | Forbidden |
+| **ENABLE_MULTI_USER_BASIC_AUTH** | Forbidden | Required | Forbidden | Forbidden | Forbidden |
+| **ENABLE_TOKEN_EXCHANGE** | Forbidden | Forbidden | Forbidden | Required | Forbidden |
+| **ENABLE_OFFLINE_ACCESS** | Optional\* | Optional\* | Optional\* | Optional\* | Forbidden |
+| **TOKEN_ENCRYPTION_KEY** | If offline | If offline | If offline | If offline | Forbidden |
+| **TOKEN_STORAGE_DB** | If offline | If offline | If offline | If offline | Forbidden |
+| **OIDC_CLIENT_ID** | Forbidden | If offline | Optional\*\* | Optional\*\* | Forbidden |
+| **OIDC_CLIENT_SECRET** | Forbidden | If offline | Optional\*\* | Optional\*\* | Forbidden |
+| **VECTOR_SYNC_ENABLED** | Optional | Optional | Optional | Optional | Forbidden |
+| **QDRANT_URL/LOCATION** | If vector | If vector | If vector | If vector | Forbidden |
+| **OLLAMA_BASE_URL/OPENAI_API_KEY** | Optional | Optional | Optional | Optional | Forbidden |
+
+\* Only enables background sync for semantic search
+\*\* Uses DCR if not provided
+
+## Consequences
+
+### Positive
+
+1. **Clarity:** Single function to detect mode from config
+2. **Validation:** All config validated upfront with helpful errors
+3. **Debugging:** Clear logs showing "Running in X mode with config Y"
+4. **Maintenance:** Mode-specific logic can be isolated
+5. **Documentation:** Clear mapping of mode → required config
+6. **Error Messages:** Context-aware ("X is required for Y mode")
+7. **Testing:** Each mode testable in isolation
+
+### Negative
+
+1. **Migration:** Existing invalid configurations will now fail at startup
+2. **Flexibility:** Less flexibility in configuration combinations
+3. **Strictness:** Some previously-working combinations may be rejected
+
+### Neutral
+
+1. **Backward Compatibility:** Valid configurations continue to work
+2. **Mode Detection:** Automatic based on config (no explicit mode selection)
+3. **Default Mode:** OAuth single-audience when no credentials provided
+
+## Implementation Notes
+
+### Embedding Provider Validation
+
+Originally, validation required either `OLLAMA_BASE_URL` or `OPENAI_API_KEY` when vector sync was enabled. This was too strict because the Simple provider is always available as a fallback (ADR-015). The validation was removed to allow vector sync without explicit provider configuration.
+
+### Variable Scoping Issues
+
+During implementation, several Python variable scoping issues were discovered in `app.py`:
+- Local variable assignments in `starlette_lifespan()` shadowed outer scope variables
+- Fixed by using unique variable names (e.g., `nextcloud_host_for_context`, `basic_auth_storage`)
+- Removed redundant `settings = get_settings()` call (re-used outer scope)
+
+### Docker Compose Configuration
+
+The `mcp-oauth` service configuration was updated to remove `ENABLE_MULTI_USER_BASIC_AUTH=true` which conflicted with its intended OAuth mode. The service now runs in OAuth single-audience mode with vector sync using the Simple embedding provider as fallback.
+
+## Testing
+
+### Unit Tests
+
+`tests/unit/test_config_validators.py` provides comprehensive coverage:
+- Mode detection with priority ordering (7 tests)
+- Single-user BasicAuth validation (8 tests)
+- Multi-user BasicAuth validation (7 tests)
+- OAuth single-audience validation (6 tests)
+- OAuth token exchange validation (3 tests)
+- Smithery validation (4 tests)
+- Mode summary generation (3 tests)
+- Edge cases (3 tests)
+
+**Total: 41 tests, all passing**
+
+### Integration Tests
+
+Integration tests verify that:
+- Each mode starts successfully with valid configuration
+- Invalid configurations fail with clear error messages
+- Existing deployments continue to work
+
+## References
+
+- [ADR-002: Vector Sync Authentication](ADR-002-vector-sync-authentication.md)
+- [ADR-004: Progressive Consent](ADR-004-progressive-consent.md)
+- [ADR-015: Unified Provider Architecture](ADR-015-unified-provider-architecture.md)
+- [ADR-019: Multi-user BasicAuth Pass-Through](ADR-019-multi-user-basicauth-passthrough.md)
+- Implementation: `nextcloud_mcp_server/config_validators.py`
+- Tests: `tests/unit/test_config_validators.py`
@@ -0,0 +1,391 @@
+# ADR-021: Configuration Consolidation and Simplification
+
+**Status:** Accepted
+**Date:** 2025-12-21
+**Deciders:** Development Team
+**Related:** ADR-020 (Deployment Modes), ADR-002 (Vector Sync), ADR-004 (Progressive Consent)
+
+## Context
+
+The configuration system has grown complex with overlapping concerns that make it difficult for users to switch between deployment modes and understand configuration dependencies.
+
+### Problems Identified
+
+1. **Confusing variable names don't reflect purpose**:
+   - `ENABLE_OFFLINE_ACCESS` - Actually controls refresh token storage for background operations, not general "offline" capabilities
+   - `VECTOR_SYNC_ENABLED` - Controls semantic search background indexing (implementation detail, not user-facing feature name)
+   - Users struggle to understand what these variables actually control
+
+2. **Redundant configuration requirements**:
+   - Multi-user semantic search requires setting BOTH `ENABLE_OFFLINE_ACCESS=true` AND `VECTOR_SYNC_ENABLED=true`
+   - The dependency is one-way (semantic search needs background ops, but background ops don't need semantic search)
+   - Users must understand internal implementation details to configure a user-facing feature
+
+3. **Implicit mode detection creates ambiguity**:
+   - Five deployment modes detected via priority-based logic
+   - Users can't easily predict which mode will activate
+   - Configuration errors don't clearly indicate which mode triggered the requirement
+
+4. **OIDC_CLIENT_ID vs NEXTCLOUD_OIDC_CLIENT_ID confusion**:
+   - Investigation revealed these are NOT actually overlapping (`OIDC_CLIENT_ID` is test-only)
+   - However, their similar names create confusion
+
+### Current Configuration Complexity
+
+**Example: Multi-user OAuth with semantic search**:
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_OFFLINE_ACCESS=true      # Why is this needed?
+VECTOR_SYNC_ENABLED=true        # And this separately?
+QDRANT_URL=http://qdrant:6333
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+```
+
+Users must understand:
+- Semantic search requires background token storage (ENABLE_OFFLINE_ACCESS)
+- Background token storage requires encryption keys
+- The relationship between ENABLE_OFFLINE_ACCESS and VECTOR_SYNC_ENABLED
+- Which deployment mode these settings will activate
+
+## Decision
+
+We consolidate overlapping functionality and add explicit mode selection while maintaining 100% backward compatibility.
+
+### 1. Automatic Dependency Resolution
+
+**Make ENABLE_SEMANTIC_SEARCH the primary control** that automatically enables required dependencies:
+
+**New behavior**:
+```python
+@property
+def enable_background_operations(self) -> bool:
+    """Background operations - auto-enabled by semantic search in multi-user modes."""
+    # Check new names first
+    explicit = os.getenv("ENABLE_BACKGROUND_OPERATIONS", "").lower() == "true"
+    # Fall back to old name with deprecation warning
+    legacy = os.getenv("ENABLE_OFFLINE_ACCESS", "").lower() == "true"
+    # Auto-enable if semantic search needs it
+    auto_enabled = self.enable_semantic_search and self.is_multi_user_mode()
+
+    return explicit or legacy or auto_enabled
+
+@property
+def enable_semantic_search(self) -> bool:
+    """Semantic search - renamed from VECTOR_SYNC_ENABLED."""
+    new_value = os.getenv("ENABLE_SEMANTIC_SEARCH", "").lower() == "true"
+    old_value = os.getenv("VECTOR_SYNC_ENABLED", "").lower() == "true"
+    return new_value or old_value
+```
+
+**Result**: Users set `ENABLE_SEMANTIC_SEARCH=true` and the system automatically enables background token storage when needed.
+
+### 2. Explicit Mode Selection (Optional)
+
+Add `MCP_DEPLOYMENT_MODE` environment variable to remove detection ambiguity:
+
+```bash
+# Optional: Explicitly declare deployment mode
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# Valid values: single_user_basic, multi_user_basic,
+#               oauth_single_audience, oauth_token_exchange, smithery
+```
+
+**Detection logic**:
+1. If `MCP_DEPLOYMENT_MODE` is set → validate and use it
+2. Otherwise → use priority-based auto-detection (existing behavior)
+3. Validate explicit mode doesn't conflict with detected mode
+
+### 3. Simplified User Experience
+
+**Before**:
+```bash
+# Multi-user OAuth with semantic search
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_OFFLINE_ACCESS=true      # Confusing
+VECTOR_SYNC_ENABLED=true        # Why both?
+QDRANT_URL=http://qdrant:6333
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+```
+
+**After**:
+```bash
+# Multi-user OAuth with semantic search
+NEXTCLOUD_HOST=https://nextcloud.example.com
+MCP_DEPLOYMENT_MODE=oauth_single_audience  # Explicit (optional)
+ENABLE_SEMANTIC_SEARCH=true                # Auto-enables background ops
+QDRANT_URL=http://qdrant:6333
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+```
+
+**Benefits**:
+- 2 fewer variables to understand/set
+- Clear intent ("I want semantic search")
+- Explicit mode declaration (optional)
+- All existing configs continue working
+
+### 4. Variable Naming Strategy
+
+**Deprecated (but still functional)**:
+- `ENABLE_OFFLINE_ACCESS` → Renamed to `ENABLE_BACKGROUND_OPERATIONS`
+- `VECTOR_SYNC_ENABLED` → Renamed to `ENABLE_SEMANTIC_SEARCH`
+
+**No change needed**:
+- `VECTOR_SYNC_SCAN_INTERVAL` - Implementation tuning parameter (keep as-is)
+- `VECTOR_SYNC_PROCESSOR_WORKERS` - Implementation tuning parameter (keep as-is)
+- `VECTOR_SYNC_QUEUE_MAX_SIZE` - Implementation tuning parameter (keep as-is)
+
+**Rationale**: Only rename user-facing feature flags, not internal tuning parameters.
+
+### 5. Backward Compatibility
+
+**Support both old and new names for minimum 2 major versions**:
+
+```python
+@property
+def enable_semantic_search(self) -> bool:
+    new_value = os.getenv("ENABLE_SEMANTIC_SEARCH", "").lower() == "true"
+    old_value = os.getenv("VECTOR_SYNC_ENABLED", "").lower() == "true"
+
+    if new_value and old_value:
+        logger.warning(
+            "Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. "
+            "Using ENABLE_SEMANTIC_SEARCH. VECTOR_SYNC_ENABLED is deprecated."
+        )
+
+    if old_value and not new_value:
+        logger.warning(
+            "VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead."
+        )
+
+    return new_value or old_value
+```
+
+**Deprecation timeline**:
+- v0.6.0: Add new variables, deprecate old ones (both work with warnings)
+- v1.0.0: Remove old variables (breaking change, well-announced)
+- Minimum 2 major versions of support (12+ months)
+
+## Consequences
+
+### Positive
+
+1. **Reduced cognitive load**: Users set `ENABLE_SEMANTIC_SEARCH=true` instead of understanding internal dependencies
+2. **Clearer intent**: Variable names reflect user-facing features, not implementation details
+3. **Explicit mode control**: `MCP_DEPLOYMENT_MODE` removes detection ambiguity
+4. **Better onboarding**: New users see simpler configuration in env.sample
+5. **Improved error messages**: Validation can suggest "set MCP_DEPLOYMENT_MODE=X" instead of relying on implicit detection
+6. **No breaking changes**: All existing configurations continue working
+
+### Negative
+
+1. **Transition period complexity**: Both old and new names supported for 2+ versions
+2. **Documentation burden**: All docs must be updated to show new approach
+3. **Test coverage expansion**: Must test both old and new variable names in all modes
+4. **Migration effort**: Existing deployments should eventually migrate (optional but recommended)
+
+### Neutral
+
+1. **Same functionality**: No new features, just better organization
+2. **Same validation**: Underlying requirements unchanged (e.g., semantic search still needs Qdrant)
+3. **Same performance**: No runtime performance impact
+
+## Implementation
+
+### Phase 1: Configuration Consolidation (v0.6.0)
+
+**Files to modify**:
+- `nextcloud_mcp_server/config.py` - Add property-based deprecation with auto-enablement
+- `nextcloud_mcp_server/config_validators.py` - Simplify validation (semantic search no longer requires explicit background operations setting)
+- `nextcloud_mcp_server/app.py` - Add informative logging for auto-enablement
+- `tests/unit/test_config_validators.py` - Add auto-enablement tests
+- `docs/configuration-migration-v2.md` - Create migration guide
+
+**Key changes**:
+1. `enable_background_operations` property auto-enables when `enable_semantic_search=true` in multi-user modes
+2. `enable_semantic_search` property accepts both `ENABLE_SEMANTIC_SEARCH` and `VECTOR_SYNC_ENABLED`
+3. Smart logging when auto-enablement occurs or deprecated variables used
+4. Validation simplified to remove redundant requirements
+
+### Phase 2: Explicit Mode Selection (v0.6.0)
+
+**Files to modify**:
+- `nextcloud_mcp_server/config.py` - Add `deployment_mode` field
+- `nextcloud_mcp_server/config_validators.py` - Check explicit mode first, fall back to auto-detection
+- `tests/unit/test_config_validators.py` - Test mode override and conflict detection
+- `docs/configuration.md` - Document mode selection
+
+**Key changes**:
+1. Add `MCP_DEPLOYMENT_MODE` environment variable (optional)
+2. Mode detection checks explicit mode first, then auto-detects
+3. Validate explicit mode doesn't conflict with detected mode
+4. Better error messages referencing explicit mode setting
+
+### Phase 3: env.sample Reorganization (v0.6.0)
+
+**Files to create/modify**:
+- `env.sample` - Reorganize by deployment mode
+- `env.sample.single-user` - Simplest config template
+- `env.sample.oauth-multi-user` - Multi-user template showing consolidation
+- `env.sample.oauth-advanced` - Token exchange mode template
+- `README.md` - Update Quick Start to reference templates
+
+**Key changes**:
+1. Group related settings by deployment mode
+2. Show simplified configuration (only essential variables)
+3. Document automatic dependencies inline
+4. Provide mode-specific quick-start templates
+
+### Phase 4: Documentation Updates (v0.7.0)
+
+**Files to modify**:
+- `docs/configuration.md` - Lead with consolidated approach
+- `docs/authentication.md` - Update mode guidance with `MCP_DEPLOYMENT_MODE`
+- `docs/troubleshooting.md` - Add consolidation troubleshooting section
+- `docs/configuration-migration-v2.md` - Expand with comprehensive examples
+- `docs/ADR-020-deployment-modes-and-configuration-validation.md` - Update configuration matrix
+- All other ADRs - Update variable references
+
+**Key changes**:
+1. Update all examples to use new variable names
+2. Add before/after migration examples
+3. Document automatic dependency resolution
+4. Add mode selection decision tree diagram
+
+## Validation Strategy
+
+### Test Coverage Requirements
+
+**Backward compatibility tests**:
+- Old variable names still work (ENABLE_OFFLINE_ACCESS, VECTOR_SYNC_ENABLED)
+- New variable names work (ENABLE_BACKGROUND_OPERATIONS, ENABLE_SEMANTIC_SEARCH)
+- Setting both old and new triggers deprecation warning but works correctly
+- All 41 existing config validation tests pass
+
+**Auto-enablement tests**:
+- `ENABLE_SEMANTIC_SEARCH=true` in OAuth mode → `enable_background_operations=true`
+- `ENABLE_SEMANTIC_SEARCH=true` in single-user mode → `enable_background_operations=false` (not needed)
+- `ENABLE_SEMANTIC_SEARCH=false` → `enable_background_operations=false` (unless explicitly set)
+
+**Mode selection tests**:
+- `MCP_DEPLOYMENT_MODE=oauth_single_audience` → mode correctly detected
+- `MCP_DEPLOYMENT_MODE` conflicts with detected mode → validation error
+- No `MCP_DEPLOYMENT_MODE` → auto-detection works as before
+
+## Success Metrics
+
+**Immediate** (v0.6.0 release):
+- Zero breaking changes in existing deployments
+- All 41 config validation tests pass
+- New users report clearer configuration process
+
+**Medium-term** (6 months after v0.6.0):
+- 80% of new deployments use new variable names
+- Mode selection errors decrease by 50%
+- Support requests about configuration decrease
+
+**Long-term** (12+ months):
+- 90% of deployments migrated to new names
+- Old variable names can be safely removed in v1.0.0
+- Configuration-related issues in issue tracker decrease
+
+## Alternatives Considered
+
+### Alternative 1: Just Rename Variables
+
+**Rejected**: User feedback: "There's no reason to just rename variables without consolidating functionality"
+
+This would make names clearer but wouldn't reduce the number of variables users need to set. The real problem is requiring users to set both ENABLE_OFFLINE_ACCESS and VECTOR_SYNC_ENABLED when they just want semantic search.
+
+### Alternative 2: Remove ENABLE_OFFLINE_ACCESS Entirely
+
+**Rejected**: Advanced users need background operations without semantic search
+
+Some deployments might want background token storage for future features (background Deck sync, background Calendar sync, etc.) without enabling semantic search. Keeping ENABLE_BACKGROUND_OPERATIONS (renamed) allows this.
+
+### Alternative 3: Always Auto-Enable Background Operations
+
+**Rejected**: Single-user mode doesn't need background token storage
+
+Auto-enablement is only needed in multi-user modes. Single-user mode uses a shared client with BasicAuth, so background token storage is unnecessary. Always enabling it would waste resources and create confusing log messages.
+
+### Alternative 4: Require All New Names Immediately
+
+**Rejected**: Breaking change would affect all existing deployments
+
+Forcing migration to new variable names in v0.6.0 would break every existing deployment. Supporting both old and new names with deprecation warnings provides a smooth migration path.
+
+## References
+
+- [ADR-020: Deployment Modes and Configuration Validation](ADR-020-deployment-modes-and-configuration-validation.md)
+- [ADR-002: Vector Sync Authentication](ADR-002-vector-sync-authentication.md)
+- [ADR-004: Progressive Consent](ADR-004-mcp-application-oauth.md)
+- [Issue: Configuration complexity for multi-user semantic search](https://github.com/cbcoutinho/nextcloud-mcp-server/issues/XXX)
+
+## Migration Examples
+
+### Example 1: Single-User BasicAuth with Semantic Search
+
+**Before**:
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+VECTOR_SYNC_ENABLED=true
+QDRANT_LOCATION=:memory:
+```
+
+**After** (optional migration):
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+ENABLE_SEMANTIC_SEARCH=true  # Renamed
+QDRANT_LOCATION=:memory:
+# Note: Background operations NOT auto-enabled (not needed in single-user mode)
+```
+
+### Example 2: Multi-User OAuth with Semantic Search
+
+**Before**:
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_OFFLINE_ACCESS=true
+VECTOR_SYNC_ENABLED=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+QDRANT_URL=http://qdrant:6333
+```
+
+**After** (simplified):
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+MCP_DEPLOYMENT_MODE=oauth_single_audience  # Explicit (optional)
+ENABLE_SEMANTIC_SEARCH=true                # Auto-enables background operations
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+QDRANT_URL=http://qdrant:6333
+# Note: ENABLE_OFFLINE_ACCESS no longer needed (auto-enabled)
+```
+
+### Example 3: Multi-User OAuth WITHOUT Semantic Search
+
+**Before**:
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_OFFLINE_ACCESS=true  # For future background features
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+```
+
+**After** (optional migration):
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+ENABLE_BACKGROUND_OPERATIONS=true  # Renamed for clarity
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/path/to/tokens.db
+```
@@ -0,0 +1,169 @@
+# ADR-023: OAuth Authorization Server Proxy
+
+## Status
+
+Accepted
+
+## Date
+
+2026-03-02
+
+## Context
+
+When the MCP server operates in OAuth mode (e.g., `mcp-login-flow` profile), MCP clients like Claude Code need to authenticate before calling any tools. The server advertises itself as an OAuth Protected Resource via RFC 9728 (Protected Resource Metadata / PRM), which tells clients where to find the Authorization Server.
+
+### The Problem
+
+The original design used a **pass-through** pattern for Flow 1 (client authentication):
+
+1. PRM at `/.well-known/oauth-protected-resource` pointed `authorization_servers` to Nextcloud's public URL
+2. Claude Code performed OIDC discovery on Nextcloud, used DCR to register its own client, and obtained tokens directly from Nextcloud
+3. Tokens issued by Nextcloud had Claude Code's `client_id` as the `aud` (audience) claim
+
+This caused an audience mismatch:
+
+```
+Token rejected: Missing MCP audience.
+Got klehQp8uHCK9fu... (Claude Code's client_id),
+need 8ilzB5ZPWr2Qt4... (MCP server's client_id) or http://localhost:8004
+```
+
+The `_has_mcp_audience()` check in `unified_verifier.py` correctly requires tokens to contain either the MCP server's `client_id` or its URL as the audience — but tokens obtained directly from Nextcloud by a third-party client will never have that audience.
+
+This meant Claude Code could never authenticate → could never call `nc_auth_provision_access` → Login Flow v2 never triggered → the server was unusable.
+
+### Why Not Just Relax Audience Validation?
+
+Audience validation exists for security (RFC 7519 §4.1.3). Removing it would allow any valid Nextcloud token to access the MCP server, including tokens issued for completely different purposes.
+
+## Decision
+
+Make the MCP server act as its own **OAuth Authorization Server proxy** (intermediary pattern). The MCP server advertises itself as the AS, handles client registration and authorization, but proxies the actual authentication to Nextcloud using its own credentials. This ensures all tokens have the correct audience.
+
+### Flow Overview
+
+```
+Client                    MCP Server (AS Proxy)              Nextcloud (IdP)
+  |                              |                                |
+  |-- POST /oauth/register ----->| ---- proxy DCR --------------->|
+  |<---- client_id, etc. --------|<---- client_id, etc. ----------|
+  |                              |                                |
+  |-- GET /oauth/authorize ----->| (store client params)          |
+  |  (client_id, redirect,       | redirect with MCP's client_id  |
+  |   code_challenge, state)     |------- GET /authorize -------->|
+  |                              |  (MCP client_id, MCP callback) |
+  |                              |                                |
+  |                              |    [user authenticates]        |
+  |                              |                                |
+  |                              |<------ code + state -----------|
+  |                              | (exchange code server-side)    |
+  |                              |------- POST /token ----------->|
+  |                              |  (code, MCP client_id+secret)  |
+  |                              |<------ NC token (aud=MCP) -----|
+  |                              |                                |
+  |                              | (generate proxy_code, store    |
+  |                              |  mapping to NC token)          |
+  |<-- redirect to client -------|                                |
+  |    (proxy_code, state)       |                                |
+  |                              |                                |
+  |-- POST /oauth/token -------->| (verify PKCE, lookup code)    |
+  |  (proxy_code, code_verifier) | return stored NC token        |
+  |<---- access_token -----------|                                |
+  |                              |                                |
+  |-- POST /mcp (Bearer token) ->| verify_access_token()         |
+  |  (NC token with aud=MCP ✓)   | _has_mcp_audience() → PASS    |
+```
+
+### Key Design Decisions
+
+#### 1. PKCE Handling — Local Verification
+
+The MCP server receives the client's `code_challenge` but does **not** forward it to Nextcloud. Instead:
+
+- **Nextcloud side**: MCP server authenticates as a confidential client (`client_id` + `client_secret`), so PKCE is not required
+- **Client side**: MCP server verifies PKCE locally when the client exchanges the proxy code at `/oauth/token`
+
+This avoids the impossible situation where the server would need the `code_verifier` to exchange code with Nextcloud but doesn't have it (only the client does).
+
+#### 2. In-Memory Proxy Code Storage
+
+Proxy codes (the authorization codes issued by the AS proxy to clients) use in-memory storage rather than SQLite because:
+
+- They have a 60-second TTL
+- They are single-use (deleted on exchange)
+- They only exist during the brief OAuth flow
+- The MCP server is single-instance
+
+#### 3. PRM Points to MCP Server
+
+The `authorization_servers` field in the PRM response now points to the MCP server URL instead of Nextcloud's public URL. This is what triggers the entire proxy flow — clients discover the MCP server as their AS.
+
+#### 4. DCR Proxy
+
+Client registration requests at `/oauth/register` are proxied to Nextcloud's DCR endpoint. The resulting `client_id` is stored in the local `ClientRegistry` so that `/oauth/authorize` can validate it. The client receives the same DCR response it would get from Nextcloud directly.
+
+## Alternatives Considered
+
+### 1. Relax Audience Validation
+
+Remove `_has_mcp_audience()` check entirely. **Rejected**: Violates RFC 7519 security model.
+
+### 2. Client Pre-Registration
+
+Require clients to register directly with Nextcloud and configure the MCP server with their `client_id`. **Rejected**: Poor UX, doesn't work with DCR-based clients like Claude Code.
+
+### 3. Token Exchange (RFC 8693)
+
+The MCP server could accept any Nextcloud token and exchange it for one with the correct audience. **Rejected**: Nextcloud's OIDC app doesn't support RFC 8693 token exchange. This was already explored in ADR-005.
+
+### 4. Custom Audience Configuration
+
+Add configuration to accept specific external `client_id` values as valid audiences. **Rejected**: Requires manual configuration per client, doesn't scale with DCR.
+
+## New Endpoints
+
+| Endpoint | Method | Purpose |
+|----------|--------|---------|
+| `/.well-known/oauth-authorization-server` | GET | RFC 8414 AS metadata |
+| `/oauth/authorize` | GET | Authorization (modified: intermediary, not pass-through) |
+| `/oauth/token` | POST | Token exchange (proxy codes + refresh token proxy) |
+| `/oauth/register` | POST | DCR proxy to Nextcloud |
+
+## Files Modified
+
+| File | Changes |
+|------|---------|
+| `nextcloud_mcp_server/auth/oauth_routes.py` | New: `oauth_as_metadata`, `oauth_register_proxy`, `oauth_token_endpoint`, `_oauth_callback_as_proxy`. Modified: `oauth_authorize` (intermediary pattern), `oauth_callback` (AS proxy routing) |
+| `nextcloud_mcp_server/app.py` | New routes, PRM `authorization_servers` → MCP server URL, `app.state.supported_scopes` |
+| `nextcloud_mcp_server/auth/client_registry.py` | New: `register_proxy_client()`, wildcard scope support |
+
+## Consequences
+
+### Positive
+
+- Tokens always have the correct audience — `_has_mcp_audience()` passes
+- Works with any MCP client that implements RFC 9728 (PRM) discovery
+- No changes needed to Nextcloud's OIDC configuration
+- DCR still works transparently (clients register via proxy)
+- Existing Flow 2 (resource provisioning) and browser login are unaffected
+
+### Negative
+
+- MCP server is now stateful during the OAuth flow (in-memory proxy codes)
+- Extra network hop for token exchange (MCP server → Nextcloud → back)
+- Token refresh requires proxying through the MCP server
+- Single-instance limitation for proxy code storage (acceptable for current deployment model)
+
+### Risks
+
+- In-memory proxy codes are lost on server restart (mitigated by 60s TTL — user just retries)
+- Discovery endpoint fetch during OAuth flow adds latency (could be cached)
+
+## References
+
+- [RFC 8414 — OAuth 2.0 Authorization Server Metadata](https://tools.ietf.org/html/rfc8414)
+- [RFC 9728 — OAuth 2.0 Protected Resource Metadata](https://tools.ietf.org/html/rfc9728)
+- [RFC 7636 — PKCE](https://tools.ietf.org/html/rfc7636)
+- [RFC 7591 — Dynamic Client Registration](https://tools.ietf.org/html/rfc7591)
+- ADR-004 — MCP Application OAuth (progressive consent architecture)
+- ADR-005 — Token Audience Validation
@@ -0,0 +1,104 @@
+# MCP 1.23.x DNS Rebinding Protection Fix
+
+## Problem
+
+MCP Python SDK 1.23.0 introduced **automatic DNS rebinding protection** that breaks containerized deployments (Kubernetes, Docker) when the protection is unintentionally auto-enabled.
+
+### Root Cause
+
+From `mcp/server/fastmcp/server.py:177-183` in the Python SDK:
+
+```python
+# Auto-enable DNS rebinding protection for localhost (IPv4 and IPv6)
+if transport_security is None and host in ("127.0.0.1", "localhost", "::1"):
+    transport_security = TransportSecuritySettings(
+        enable_dns_rebinding_protection=True,
+        allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"],
+        allowed_origins=["http://127.0.0.1:*", "http://localhost:*", "http://[::1]:*"],
+    )
+```
+
+### What Was Happening
+
+1. **FastMCP initialization** in `app.py` didn't pass `host` or `transport_security` parameters
+2. **Defaults applied**: `host="127.0.0.1"`, `transport_security=None`
+3. **Auto-enablement triggered**: Condition `transport_security is None and host == "127.0.0.1"` was TRUE
+4. **Protection activated** with `allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"]`
+5. **Kubernetes requests rejected**: `Host: nextcloud-mcp-server.default.svc.cluster.local:8000` didn't match allowed hosts
+
+### Why `--host 0.0.0.0` Didn't Help
+
+The `--host` CLI flag (used in Dockerfile/docker-compose) controls **uvicorn's bind address**, NOT the **FastMCP `host` parameter**. These are separate concerns:
+
+- **Uvicorn bind address** (`--host 0.0.0.0`): Where the HTTP server listens
+- **FastMCP host parameter** (defaulted to `"127.0.0.1"`): Used for auto-enablement logic
+
+## Solution
+
+Explicitly disable DNS rebinding protection by passing `transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)` to all FastMCP instances.
+
+### Changes Made
+
+Modified `nextcloud_mcp_server/app.py`:
+
+1. **Import** `TransportSecuritySettings` from `mcp.server.transport_security`
+2. **Updated all three FastMCP initializations**:
+   - OAuth mode (line 1015)
+   - Smithery stateless mode (line 1030)
+   - BasicAuth mode (line 1040)
+
+Each now includes:
+```python
+transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)
+```
+
+## Impact
+
+### ✅ What This Fixes
+
+- **Kubernetes deployments**: Requests with k8s service DNS names now work
+- **Docker deployments**: Port-mapped requests (localhost:8000 → container) now work
+- **Reverse proxy deployments**: Proxied requests with various Host headers now work
+- **Ingress controllers**: Requests via ingress hostnames now work
+
+### 🔒 Security Considerations
+
+DNS rebinding protection defends against attacks where:
+1. Attacker controls a DNS domain (e.g., `evil.com`)
+2. DNS initially resolves to attacker's IP
+3. After victim's browser caches the origin, DNS changes to victim's localhost
+4. Attacker's page can now make requests to victim's localhost services
+
+**Why it's safe to disable for this deployment:**
+
+1. **OAuth authentication required** in production deployments (ADR-002, ADR-004)
+2. **Network-level isolation** in containerized environments (k8s network policies, Docker networks)
+3. **MCP is server-to-server**, not exposed to browsers (no CORS concerns)
+4. **Host header validation inappropriate** for multi-tenant k8s environments
+
+If DNS rebinding protection is needed for specific deployments, it can be re-enabled with a custom allowed hosts list:
+
+```python
+transport_security=TransportSecuritySettings(
+    enable_dns_rebinding_protection=True,
+    allowed_hosts=[
+        "nextcloud-mcp-server.default.svc.cluster.local:*",
+        "mcp.example.com:*",
+        # Add all your expected Host header values
+    ]
+)
+```
+
+## Testing
+
+- ✅ Ruff linting passes
+- ✅ Type checking passes (pre-existing warnings unrelated)
+- ✅ Module imports successfully
+- ✅ Compatible with MCP 1.23.x
+
+## References
+
+- [MCP Python SDK 1.23.0 Release](https://github.com/modelcontextprotocol/python-sdk/releases/tag/v1.23.0)
+- Commit: `d3a1841` - "Auto-enable DNS rebinding protection for localhost servers"
+- Issue #373 (original report of k8s breakage)
+- PR #382 (MCP 1.23.x upgrade)
@@ -0,0 +1,422 @@
+# Authentication Flows by Deployment Mode
+
+This document provides a unified reference for authentication flows across all deployment modes. For configuration details, see [Authentication](authentication.md). For OAuth protocol details, see [OAuth Architecture](oauth-architecture.md).
+
+## Quick Reference Matrix
+
+| Mode | Client → MCP → NC | Background Sync | Astrolabe → MCP |
+|------|-------------------|-----------------|-----------------|
+| [Single-User BasicAuth](#1-single-user-basicauth) | Embedded credentials | Same credentials | N/A |
+| [Multi-User BasicAuth](#2-multi-user-basicauth) | Header pass-through | App password (optional) | Bearer token |
+| [OAuth Single-Audience](#3-oauth-single-audience-default) | Multi-audience token | Refresh token exchange | Bearer token |
+| [OAuth Token Exchange](#4-oauth-token-exchange-rfc-8693) | RFC 8693 exchange | Refresh token exchange | Bearer token |
+| [Smithery Stateless](#5-smithery-stateless) | Session parameters | Not supported | N/A |
+
+## Communication Patterns
+
+This document covers three distinct communication patterns:
+
+1. **MCP Client → MCP Server → Nextcloud**: Interactive tool calls initiated by users through MCP clients (Claude Desktop, etc.)
+2. **MCP Server → Nextcloud**: Background operations like vector sync that run without user interaction
+3. **Astrolabe → MCP Server**: Nextcloud app backend communication for settings UI and unified search
+
+---
+
+## Deployment Modes
+
+### 1. Single-User BasicAuth
+
+**Use Case:** Personal Nextcloud instance, local development, single-user deployments.
+
+#### MCP Client → MCP Server → Nextcloud
+
+```
+MCP Client                    MCP Server                   Nextcloud
+    │                             │                            │
+    │── MCP Request ─────────────▶│                            │
+    │   (no auth required)        │                            │
+    │                             │── HTTP + BasicAuth ───────▶│
+    │                             │   Authorization: Basic     │
+    │                             │   (embedded credentials)   │
+    │                             │◀── API Response ───────────│
+    │◀── Tool Result ─────────────│                            │
+```
+
+**Key characteristics:**
+- Credentials embedded in server configuration (`NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`)
+- Single shared `NextcloudClient` created at startup
+- No MCP-level authentication required (server trusts local clients)
+- All requests use the same Nextcloud user
+
+**Implementation:** `context.py:78-79` - Returns shared client from lifespan context
+
+#### Background Sync
+
+Uses the same embedded credentials as interactive requests. The background job accesses Nextcloud with the configured username/password.
+
+**Implementation:** Background jobs use `get_settings()` to access credentials
+
+#### Astrolabe Integration
+
+Not applicable - Astrolabe is only used in multi-user deployments where users need personal settings and token management.
+
+---
+
+### 2. Multi-User BasicAuth
+
+**Use Case:** Internal deployment where users provide their own credentials via HTTP headers.
+
+#### MCP Client → MCP Server → Nextcloud
+
+```
+MCP Client                    MCP Server                   Nextcloud
+    │                             │                            │
+    │── MCP Request ─────────────▶│                            │
+    │   Authorization: Basic      │                            │
+    │   (user credentials)        │                            │
+    │                             │── BasicAuthMiddleware ────▶│
+    │                             │   Extracts credentials     │
+    │                             │                            │
+    │                             │── HTTP + BasicAuth ───────▶│
+    │                             │   (pass-through)           │
+    │                             │◀── API Response ───────────│
+    │◀── Tool Result ─────────────│                            │
+```
+
+**Key characteristics:**
+- `BasicAuthMiddleware` extracts credentials from `Authorization: Basic` header
+- Credentials passed through to Nextcloud (not stored)
+- Client created per-request from extracted credentials
+- Stateless - no credential storage between requests
+
+**Implementation:** `context.py:187-248` - `_get_client_from_basic_auth()` extracts credentials from request state
+
+#### Background Sync (Optional)
+
+Requires `ENABLE_OFFLINE_ACCESS=true`. Users can store app passwords via Astrolabe for background operations.
+
+```
+Astrolabe                     MCP Server                   Nextcloud
+    │                             │                            │
+    │── Store App Password ──────▶│                            │
+    │   (via management API)      │                            │
+    │                             │── Store in SQLite ────────▶│
+    │                             │   (encrypted)              │
+    │◀── Confirmation ────────────│                            │
+    │                             │                            │
+    │         [Background Job]    │                            │
+    │                             │── Retrieve app password ──▶│
+    │                             │   (from encrypted storage) │
+    │                             │── HTTP + BasicAuth ───────▶│
+    │                             │   (stored app password)    │
+    │                             │◀── API Response ───────────│
+```
+
+**Requirements:**
+- `ENABLE_OFFLINE_ACCESS=true`
+- `TOKEN_ENCRYPTION_KEY` for credential encryption
+- `TOKEN_STORAGE_DB` for SQLite storage path
+
+#### Astrolabe → MCP Server
+
+```
+Astrolabe                     MCP Server                   Nextcloud OIDC
+    │                             │                            │
+    │── OAuth Flow ──────────────▶│◀── Token from IdP ────────▶│
+    │   (user initiates)          │                            │
+    │                             │                            │
+    │── Bearer Token ────────────▶│                            │
+    │   (management API calls)    │                            │
+    │                             │── Validate via JWKS ──────▶│
+    │                             │   (or introspection)       │
+    │◀── API Response ────────────│                            │
+```
+
+**Key characteristics:**
+- Astrolabe has its own OAuth client (`astrolabe_client_id` in Nextcloud config)
+- Tokens are validated by MCP server using Nextcloud OIDC JWKS
+- Authorization check: `token.sub == requested_resource_owner`
+- Any valid Nextcloud OIDC token accepted (relaxed audience validation per ADR-018)
+
+**Implementation:** `unified_verifier.py:120-183` - `verify_token_for_management_api()` validates without strict audience check
+
+---
+
+### 3. OAuth Single-Audience (Default)
+
+**Use Case:** Multi-user deployment with OAuth authentication. Tokens work for both MCP and Nextcloud.
+
+This is the default mode when `NEXTCLOUD_USERNAME`/`NEXTCLOUD_PASSWORD` are not set.
+
+#### MCP Client → MCP Server → Nextcloud
+
+```
+MCP Client                    MCP Server                   Nextcloud
+    │                             │                            │
+    │── Bearer Token ────────────▶│                            │
+    │   aud: ["mcp-server",       │                            │
+    │         "nextcloud"]        │                            │
+    │                             │── Validate MCP audience ──▶│
+    │                             │   (UnifiedTokenVerifier)   │
+    │                             │                            │
+    │                             │── HTTP + Same Token ──────▶│
+    │                             │   Authorization: Bearer    │
+    │                             │   (multi-audience token)   │
+    │                             │                            │
+    │                             │   NC validates its own aud │
+    │                             │◀── API Response ───────────│
+    │◀── Tool Result ─────────────│                            │
+```
+
+**Key characteristics:**
+- Token contains both audiences: `aud: ["mcp-server", "nextcloud"]`
+- MCP server validates only MCP audience (per RFC 7519)
+- Nextcloud independently validates its own audience
+- No token exchange needed - same token used throughout
+- Stateless operation for interactive requests
+
+**Token validation flow:**
+1. `UnifiedTokenVerifier.verify_token()` validates MCP audience
+2. Token passed directly to Nextcloud via `get_client_from_context()`
+3. Nextcloud validates its own audience when receiving API calls
+
+**Implementation:**
+- `unified_verifier.py:185-252` - `_verify_mcp_audience()` validates MCP audience only
+- `context.py:96-99` - Uses token directly in multi-audience mode
+
+#### Background Sync
+
+Requires `ENABLE_OFFLINE_ACCESS=true`. Uses stored refresh tokens to obtain access tokens for background operations.
+
+```
+                              MCP Server                   Nextcloud OIDC
+                                  │                            │
+    [Background Job starts]       │                            │
+                                  │── Get refresh token ──────▶│
+                                  │   (from encrypted storage) │
+                                  │                            │
+                                  │── Token refresh request ──▶│
+                                  │   grant_type=refresh_token │
+                                  │   scope=openid profile ... │
+                                  │◀── New access + refresh ───│
+                                  │   (rotation)               │
+                                  │                            │
+                                  │── Store rotated refresh ──▶│
+                                  │   (encrypted)              │
+                                  │                            │
+                                  │── HTTP + Access Token ────▶│
+                                  │   Authorization: Bearer    │
+                                  │◀── API Response ───────────│
+```
+
+**Key characteristics:**
+- Refresh tokens stored encrypted in SQLite (`TOKEN_STORAGE_DB`)
+- Nextcloud OIDC rotates refresh tokens on every use (one-time use)
+- `TokenBrokerService` handles token lifecycle
+- Per-user locking prevents race conditions during concurrent refresh
+
+**Implementation:**
+- `token_broker.py:269-362` - `get_background_token()` handles refresh with locking
+- `token_broker.py:428-509` - `_refresh_access_token_with_scopes()` exchanges refresh token
+
+#### Astrolabe → MCP Server
+
+Same as Multi-User BasicAuth. See [Astrolabe → MCP Server](#astrolabe--mcp-server) above.
+
+---
+
+### 4. OAuth Token Exchange (RFC 8693)
+
+**Use Case:** Multi-user deployment where MCP tokens are separate from Nextcloud tokens. Provides stronger security boundaries.
+
+Enabled by `ENABLE_TOKEN_EXCHANGE=true`.
+
+#### MCP Client → MCP Server → Nextcloud
+
+```
+MCP Client                    MCP Server                   Nextcloud OIDC
+    │                             │                            │
+    │── Bearer Token ────────────▶│                            │
+    │   aud: "mcp-server"         │                            │
+    │   (MCP audience only)       │                            │
+    │                             │── Validate MCP audience ──▶│
+    │                             │                            │
+    │                             │── RFC 8693 Exchange ──────▶│
+    │                             │   grant_type=              │
+    │                             │     urn:ietf:params:oauth: │
+    │                             │     grant-type:token-exchange
+    │                             │   subject_token=<mcp-token>│
+    │                             │   requested_audience=      │
+    │                             │     "nextcloud"            │
+    │                             │◀── Delegated Token ────────│
+    │                             │   aud: "nextcloud"         │
+    │                             │                            │
+    │                             │── HTTP + Delegated Token ─▶│
+    │                             │   Authorization: Bearer    │
+    │                             │◀── API Response ───────────│
+    │◀── Tool Result ─────────────│                            │
+```
+
+**Key characteristics:**
+- Strict audience separation: MCP token has `aud: "mcp-server"` only
+- Server exchanges for Nextcloud-audience token on each request
+- Ephemeral delegated tokens (not cached by default)
+- Strongest security boundary between MCP and Nextcloud access
+
+**Token exchange details:**
+- Uses RFC 8693 "urn:ietf:params:oauth:grant-type:token-exchange"
+- Subject token: MCP access token
+- Requested audience: Nextcloud resource URI
+- Result: Short-lived token scoped for Nextcloud
+
+**Implementation:**
+- `token_broker.py:220-267` - `get_session_token()` performs on-demand exchange
+- `token_exchange.py` - `exchange_token_for_delegation()` implements RFC 8693
+- `context.py:88-94` - Routes to session client in exchange mode
+
+#### Background Sync
+
+Same as OAuth Single-Audience. Uses stored refresh tokens from Flow 2 provisioning.
+
+```
+                              MCP Server                   Nextcloud OIDC
+                                  │                            │
+    [User provisions access]      │                            │
+                                  │── Flow 2 OAuth ───────────▶│
+                                  │   client_id="mcp-server"   │
+                                  │   scope=offline_access ... │
+                                  │◀── Refresh Token ──────────│
+                                  │   (stored encrypted)       │
+                                  │                            │
+    [Background Job runs later]   │                            │
+                                  │── Refresh for background ─▶│
+                                  │   (same as single-audience)│
+```
+
+**Key difference from interactive:**
+- Interactive: On-demand token exchange per request
+- Background: Uses pre-provisioned refresh tokens (Flow 2)
+
+#### Astrolabe → MCP Server
+
+Same as Multi-User BasicAuth. See [Astrolabe → MCP Server](#astrolabe--mcp-server) above.
+
+---
+
+### 5. Smithery Stateless
+
+**Use Case:** Multi-tenant SaaS deployment via Smithery platform. Fully stateless.
+
+Enabled by `SMITHERY_DEPLOYMENT=true`.
+
+#### MCP Client → MCP Server → Nextcloud
+
+```
+MCP Client                    MCP Server                   Nextcloud
+    │                             │                            │
+    │── SSE Connect ─────────────▶│                            │
+    │   ?nextcloud_url=...        │                            │
+    │   &username=...             │                            │
+    │   &app_password=...         │                            │
+    │                             │── SmitheryConfigMiddleware │
+    │                             │   Extract URL params       │
+    │                             │                            │
+    │── MCP Request ─────────────▶│                            │
+    │   (no Authorization header) │                            │
+    │                             │── Create per-request ─────▶│
+    │                             │   NextcloudClient          │
+    │                             │                            │
+    │                             │── HTTP + BasicAuth ───────▶│
+    │                             │   (from session params)    │
+    │                             │◀── API Response ───────────│
+    │◀── Tool Result ─────────────│                            │
+```
+
+**Key characteristics:**
+- Configuration passed via URL query parameters (Smithery `configSchema`)
+- No persistent state - client created fresh per request
+- No OAuth infrastructure
+- No background sync support (stateless)
+- No admin UI available
+
+**Required session parameters:**
+- `nextcloud_url`: Nextcloud instance URL
+- `username`: Nextcloud username
+- `app_password`: Nextcloud app password
+
+**Implementation:** `context.py:108-184` - `_get_client_from_session_config()` creates client from session params
+
+#### Background Sync
+
+Not supported. Smithery mode is fully stateless with no credential storage.
+
+#### Astrolabe Integration
+
+Not applicable. Smithery deployments don't integrate with Astrolabe.
+
+---
+
+## Configuration Quick Reference
+
+### Single-User BasicAuth
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+```
+
+### Multi-User BasicAuth
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_MULTI_USER_BASIC_AUTH=true
+
+# Optional: For background sync
+ENABLE_OFFLINE_ACCESS=true
+TOKEN_ENCRYPTION_KEY=<32-byte-key>
+TOKEN_STORAGE_DB=/data/tokens.db
+```
+
+### OAuth Single-Audience (Default)
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+# No username/password triggers OAuth mode
+
+# Optional: Static client credentials (instead of DCR)
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+
+# Optional: For background sync
+ENABLE_OFFLINE_ACCESS=true
+TOKEN_ENCRYPTION_KEY=<32-byte-key>
+TOKEN_STORAGE_DB=/data/tokens.db
+```
+
+### OAuth Token Exchange
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_TOKEN_EXCHANGE=true
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+
+# Optional: For background sync
+ENABLE_OFFLINE_ACCESS=true
+TOKEN_ENCRYPTION_KEY=<32-byte-key>
+TOKEN_STORAGE_DB=/data/tokens.db
+```
+
+### Smithery Stateless
+```bash
+SMITHERY_DEPLOYMENT=true
+# All other config comes from session URL parameters
+```
+
+---
+
+## Related Documentation
+
+- [Authentication](authentication.md) - Configuration details and setup guides
+- [OAuth Architecture](oauth-architecture.md) - Deep OAuth protocol details
+- [ADR-004: Progressive Consent](ADR-004-mcp-application-oauth.md) - Dual OAuth flow architecture
+- [ADR-005: Token Audience Validation](ADR-005-token-audience-validation.md) - Audience validation strategy
+- [ADR-018: Nextcloud PHP App](ADR-018-nextcloud-php-app-for-settings-ui.md) - Astrolabe integration
+- [ADR-020: Deployment Modes](ADR-020-deployment-modes-and-configuration-validation.md) - Mode detection and validation
@@ -140,6 +140,97 @@ Basic Authentication uses username and password credentials directly.
 - [Configuration](configuration.md#basic-authentication-legacy) - BasicAuth environment variables
 - [Running the Server](running.md#basicauth-mode-legacy) - BasicAuth examples

+## Hybrid Authentication (Multi-User BasicAuth + OAuth)
+
+When running in multi-user BasicAuth mode with `ENABLE_OFFLINE_ACCESS=true`, the server operates in **hybrid authentication mode**. This provides the simplicity of BasicAuth for normal operations with the security of OAuth for administrative functions.
+
+### Authentication Domains
+
+**MCP Operations** (Tools, Resources):
+- **Auth Method**: BasicAuth (HTTP Basic username/password)
+- **Characteristics**:
+  - Stateless - no token storage
+  - Simple configuration
+  - Direct credential validation against Nextcloud
+  - Credentials passed per-request in Authorization header
+- **Used For**: MCP tool calls from Claude, MCP client operations
+
+**Management APIs** (Webhooks, Admin UI):
+- **Auth Method**: OAuth bearer tokens
+- **Characteristics**:
+  - Per-user authorization via OAuth consent flow
+  - Refresh tokens stored for background operations
+  - Token validation via UnifiedTokenVerifier
+  - Explicit user consent required
+- **Used For**: Astrolabe admin UI, webhook management, vector sync operations
+
+### Configuration
+
+```env
+# Enable multi-user BasicAuth
+ENABLE_MULTI_USER_BASIC_AUTH=true
+
+# Enable hybrid mode (OAuth provisioning for management APIs)
+ENABLE_OFFLINE_ACCESS=true
+
+# Enable background sync (required for hybrid mode currently)
+VECTOR_SYNC_ENABLED=true
+
+# Encryption key for refresh token storage
+TOKEN_ENCRYPTION_KEY=<base64-encoded-key>
+
+# Nextcloud connection
+NEXTCLOUD_HOST=https://cloud.example.com
+
+# OAuth credentials (optional - uses DCR if not set)
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+```
+
+### OAuth Provisioning Flow
+
+1. Admin opens Astrolabe admin settings in Nextcloud
+2. Clicks "Authorize" to enable webhook management
+3. Redirected to `/oauth/authorize-nextcloud` on MCP server
+4. MCP server redirects to Nextcloud OAuth consent page
+5. Admin grants OAuth consent (scopes: `openid`, `profile`, `offline_access`)
+6. Redirected back to `/oauth/callback` on MCP server
+7. MCP server stores refresh token (encrypted)
+8. Admin can now manage webhooks from Astrolabe UI
+
+### Benefits
+
+- **Simple MCP client setup**: Use BasicAuth (no OAuth complexity for end users)
+- **Secure background operations**: Webhooks use per-user OAuth tokens (no shared credentials)
+- **Explicit authorization**: Admins must explicitly grant OAuth consent for webhook operations
+- **Per-user isolation**: Each admin's webhook operations use their own refresh token
+
+### Trade-offs
+
+- **Two auth systems**: More complex server configuration than pure BasicAuth or OAuth
+- **OAuth setup required**: Admins must complete OAuth flow before managing webhooks
+- **Token storage**: Requires database and encryption key for refresh tokens
+
+### Comparison
+
+| Feature | Pure BasicAuth | Hybrid Mode | Pure OAuth |
+|---------|---------------|-------------|------------|
+| MCP Operations | BasicAuth | BasicAuth | OAuth Bearer Token |
+| Management API | N/A | OAuth Bearer Token | OAuth Bearer Token |
+| Webhook Operations | N/A | OAuth Refresh Token | OAuth Refresh Token |
+| MCP Client Setup | Simple | Simple | Complex (PKCE flow) |
+| Admin UI Auth | N/A | OAuth Consent | OAuth Login |
+| Token Storage | None | Refresh tokens only | All tokens |
+| Deployment Complexity | Low | Medium | High |
+
+### Astrolabe User Setup (Hybrid Mode)
+
+For Astrolabe-specific user setup instructions in hybrid mode, see the [Astrolabe documentation](https://github.com/cbcoutinho/astrolabe/blob/master/docs/user-setup-hybrid-mode.md).
+
+### See Also
+- [OAuth Architecture](oauth-architecture.md) - Progressive Consent (Flow 2) details
+- [Configuration](configuration.md#enable_offline_access) - Hybrid mode configuration
+
 ## Mode Detection

 The server automatically detects the authentication mode:
@@ -0,0 +1,338 @@
+# Amazon Bedrock Setup Guide
+
+This guide covers how to configure the Nextcloud MCP Server to use Amazon Bedrock for embeddings and text generation.
+
+## Prerequisites
+
+1. **AWS Account** with access to Amazon Bedrock
+2. **boto3 library** installed: `pip install boto3` or `uv sync --group dev`
+3. **Model Access** - Request access to models in AWS Bedrock console
+
+## Required AWS Permissions
+
+### IAM Policy for Bedrock Access
+
+The AWS IAM user or role needs the following permissions:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockInvokeModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel",
+        "bedrock:InvokeModelWithResponseStream"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+### Minimal Permissions (Production)
+
+For production deployments, restrict to specific models:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockEmbeddings",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0"
+      ]
+    },
+    {
+      "Sid": "BedrockGeneration",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModel"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0"
+      ]
+    }
+  ]
+}
+```
+
+### Additional Permissions (Optional)
+
+For advanced use cases:
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [
+    {
+      "Sid": "BedrockListModels",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:ListFoundationModels",
+        "bedrock:GetFoundationModel"
+      ],
+      "Resource": "*"
+    },
+    {
+      "Sid": "BedrockAsyncInvoke",
+      "Effect": "Allow",
+      "Action": [
+        "bedrock:InvokeModelAsync",
+        "bedrock:GetAsyncInvoke",
+        "bedrock:ListAsyncInvokes"
+      ],
+      "Resource": [
+        "arn:aws:bedrock:*::foundation-model/*"
+      ]
+    }
+  ]
+}
+```
+
+## Model Access
+
+Before using Bedrock models, you must request access in the AWS Console:
+
+1. Navigate to **Amazon Bedrock** → **Model access**
+2. Click **Manage model access**
+3. Select models you want to use:
+   - **Embeddings:** Amazon Titan Embed Text, Cohere Embed
+   - **Text Generation:** Anthropic Claude, Meta Llama, Amazon Titan Text
+4. Click **Request model access**
+5. Wait for approval (usually instant for most models)
+
+## Supported Models
+
+### Embedding Models
+
+| Provider | Model ID | Dimensions | Best For |
+|----------|----------|------------|----------|
+| Amazon Titan | `amazon.titan-embed-text-v1` | 1,536 | General purpose |
+| Amazon Titan | `amazon.titan-embed-text-v2:0` | 1,024 | Latest, improved quality |
+| Cohere | `cohere.embed-english-v3` | 1,024 | English text |
+| Cohere | `cohere.embed-multilingual-v3` | 1,024 | Multilingual |
+
+### Text Generation Models
+
+| Provider | Model ID | Context | Best For |
+|----------|----------|---------|----------|
+| Anthropic | `anthropic.claude-3-sonnet-20240229-v1:0` | 200K | Balanced performance |
+| Anthropic | `anthropic.claude-3-haiku-20240307-v1:0` | 200K | Fast, cost-effective |
+| Anthropic | `anthropic.claude-3-opus-20240229-v1:0` | 200K | Highest quality |
+| Meta | `meta.llama3-8b-instruct-v1:0` | 8K | Fast, open-source |
+| Meta | `meta.llama3-70b-instruct-v1:0` | 8K | High quality |
+| Amazon | `amazon.titan-text-express-v1` | 8K | Fast, low cost |
+| Mistral | `mistral.mistral-7b-instruct-v0:2` | 32K | Efficient |
+
+## Configuration
+
+### Environment Variables
+
+**Required:**
+```bash
+AWS_REGION=us-east-1
+```
+
+**Optional (at least one model required):**
+```bash
+# For embeddings
+BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+
+# For text generation (RAG evaluation)
+BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+```
+
+**AWS Credentials (choose one method):**
+
+**Method 1: Environment Variables**
+```bash
+AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
+AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 2: AWS Credentials File** (`~/.aws/credentials`)
+```ini
+[default]
+aws_access_key_id = AKIAIOSFODNN7EXAMPLE
+aws_secret_access_key = wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
+```
+
+**Method 3: IAM Role** (when running on AWS EC2/ECS/Lambda)
+- No credentials needed, uses instance/task role automatically
+
+### Docker Configuration
+
+Add to your `docker-compose.yml`:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+      - BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+      - AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID}
+      - AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY}
+```
+
+Or use AWS credentials file volume mount:
+
+```yaml
+services:
+  mcp:
+    volumes:
+      - ~/.aws:/root/.aws:ro
+    environment:
+      - AWS_REGION=us-east-1
+      - BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+```
+
+## Usage Examples
+
+### Embeddings Only
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export AWS_ACCESS_KEY_ID=your-key
+export AWS_SECRET_ACCESS_KEY=your-secret
+
+uv run nextcloud-mcp-server
+```
+
+### Both Embeddings and Generation
+
+```bash
+export AWS_REGION=us-east-1
+export BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+export BEDROCK_GENERATION_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+# For RAG evaluation with Bedrock
+export RAG_EVAL_PROVIDER=bedrock
+export RAG_EVAL_BEDROCK_MODEL=anthropic.claude-3-sonnet-20240229-v1:0
+
+uv run python -m tests.rag_evaluation.evaluate
+```
+
+### Programmatic Usage
+
+```python
+from nextcloud_mcp_server.providers import BedrockProvider
+
+# Embeddings only
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+)
+
+embeddings = await provider.embed_batch(["text1", "text2"])
+
+# Both capabilities
+provider = BedrockProvider(
+    region_name="us-east-1",
+    embedding_model="amazon.titan-embed-text-v2:0",
+    generation_model="anthropic.claude-3-sonnet-20240229-v1:0",
+)
+
+# Generate embeddings
+embedding = await provider.embed("query text")
+
+# Generate text
+response = await provider.generate("Write a summary", max_tokens=500)
+```
+
+## Cost Considerations
+
+### Embedding Costs (as of Jan 2025)
+
+| Model | Price per 1K tokens |
+|-------|---------------------|
+| Titan Embed Text v2 | $0.0001 |
+| Cohere Embed English v3 | $0.0001 |
+
+### Generation Costs (as of Jan 2025)
+
+| Model | Input (per 1K tokens) | Output (per 1K tokens) |
+|-------|----------------------|------------------------|
+| Claude 3 Haiku | $0.00025 | $0.00125 |
+| Claude 3 Sonnet | $0.003 | $0.015 |
+| Claude 3 Opus | $0.015 | $0.075 |
+| Llama 3 8B | $0.0003 | $0.0006 |
+| Titan Text Express | $0.0002 | $0.0006 |
+
+**Note:** Prices vary by region. Check [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/) for current rates.
+
+## Troubleshooting
+
+### Error: "Executable doesn't exist" or boto3 not found
+
+**Solution:**
+```bash
+uv sync --group dev  # Installs boto3
+```
+
+### Error: "AccessDeniedException"
+
+**Causes:**
+1. IAM permissions missing
+2. Model access not requested
+3. Wrong AWS region
+
+**Solution:**
+1. Verify IAM policy includes `bedrock:InvokeModel`
+2. Request model access in Bedrock console
+3. Check model is available in your region
+
+### Error: "ResourceNotFoundException"
+
+**Cause:** Invalid model ID or model not available in region
+
+**Solution:**
+- Verify model ID matches exactly (case-sensitive)
+- Check model availability in your AWS region
+- Use `aws bedrock list-foundation-models` to see available models
+
+### Error: "ThrottlingException"
+
+**Cause:** Rate limit exceeded
+
+**Solution:**
+- Reduce request rate
+- Request quota increase via AWS Support
+- Use batch operations where possible
+
+## Security Best Practices
+
+1. **Use IAM Roles** when running on AWS infrastructure
+2. **Rotate Access Keys** regularly if using IAM users
+3. **Restrict Permissions** to only required models
+4. **Enable CloudTrail** for audit logging
+5. **Use AWS Secrets Manager** for credential management
+6. **Monitor Costs** with AWS Cost Explorer and Budgets
+
+## Regional Availability
+
+Amazon Bedrock is available in:
+- **US East (N. Virginia)**: `us-east-1` ✅ Most models
+- **US West (Oregon)**: `us-west-2` ✅ Most models
+- **Asia Pacific (Singapore)**: `ap-southeast-1`
+- **Asia Pacific (Tokyo)**: `ap-northeast-1`
+- **Europe (Frankfurt)**: `eu-central-1`
+
+**Note:** Model availability varies by region. Check the [AWS Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html) for current availability.
+
+## References
+
+- [AWS Bedrock Documentation](https://docs.aws.amazon.com/bedrock/)
+- [AWS Bedrock Pricing](https://aws.amazon.com/bedrock/pricing/)
+- [boto3 Bedrock Runtime API](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime.html)
+- [Provider Architecture ADR](./ADR-015-unified-provider-architecture.md)
@@ -0,0 +1,564 @@
+# Configuration Migration Guide v2
+
+**Version:** v0.58.0
+**Status:** Active
+**Related ADR:** [ADR-021: Configuration Consolidation and Simplification](ADR-021-configuration-consolidation.md)
+
+## Overview
+
+This guide helps you migrate from the old configuration variables to the new consolidated approach introduced in v0.58.0.
+
+**Key Changes:**
+- `VECTOR_SYNC_ENABLED` → `ENABLE_SEMANTIC_SEARCH`
+- `ENABLE_OFFLINE_ACCESS` → `ENABLE_BACKGROUND_OPERATIONS`
+- New: `MCP_DEPLOYMENT_MODE` for explicit mode selection
+- Automatic dependency resolution: semantic search auto-enables background operations
+
+**Backward Compatibility:**
+- Old variable names still work in v0.58.0+
+- Deprecation warnings logged when old names used
+- Old names will be removed in v1.0.0
+
+---
+
+## Quick Reference: Variable Name Changes
+
+| Old Name | New Name | Status |
+|----------|----------|--------|
+| `VECTOR_SYNC_ENABLED` | `ENABLE_SEMANTIC_SEARCH` | Deprecated |
+| `ENABLE_OFFLINE_ACCESS` | `ENABLE_BACKGROUND_OPERATIONS` | Deprecated |
+| N/A (auto-detected) | `MCP_DEPLOYMENT_MODE` | New (optional) |
+
+**Tuning parameters unchanged:**
+- `VECTOR_SYNC_SCAN_INTERVAL` - Keep as-is
+- `VECTOR_SYNC_PROCESSOR_WORKERS` - Keep as-is
+- `VECTOR_SYNC_QUEUE_MAX_SIZE` - Keep as-is
+
+---
+
+## Migration Scenarios
+
+### Scenario 1: Single-User BasicAuth with Semantic Search
+
+**Before (v0.57.x):**
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+VECTOR_SYNC_ENABLED=true
+QDRANT_LOCATION=:memory:
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+**After (v0.58.0+):**
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+
+# Optional: Explicit mode declaration (recommended)
+MCP_DEPLOYMENT_MODE=single_user_basic
+
+# Updated variable name
+ENABLE_SEMANTIC_SEARCH=true  # Previously VECTOR_SYNC_ENABLED
+
+QDRANT_LOCATION=:memory:
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+**What Changed:**
+- ✅ Renamed `VECTOR_SYNC_ENABLED` to `ENABLE_SEMANTIC_SEARCH`
+- ✅ Added optional `MCP_DEPLOYMENT_MODE` for clarity
+- ✅ Background operations NOT auto-enabled (not needed in single-user mode)
+
+**Migration Steps:**
+1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
+2. Optionally add `MCP_DEPLOYMENT_MODE=single_user_basic`
+3. Restart server
+4. Verify deprecation warnings are gone
+
+---
+
+### Scenario 2: Multi-User OAuth with Semantic Search
+
+**Before (v0.57.x):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# Both variables required - confusing!
+ENABLE_OFFLINE_ACCESS=true
+VECTOR_SYNC_ENABLED=true
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+```
+
+**After (v0.58.0+ - Simplified):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# Optional: Explicit mode declaration
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# One variable does it all!
+ENABLE_SEMANTIC_SEARCH=true  # Automatically enables background operations
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+
+# Note: ENABLE_OFFLINE_ACCESS no longer needed!
+# Background operations are auto-enabled by ENABLE_SEMANTIC_SEARCH
+```
+
+**What Changed:**
+- ✅ Removed need for explicit `ENABLE_OFFLINE_ACCESS`
+- ✅ `ENABLE_SEMANTIC_SEARCH` automatically enables background operations in multi-user modes
+- ✅ Renamed `VECTOR_SYNC_ENABLED` to `ENABLE_SEMANTIC_SEARCH`
+- ✅ Added optional explicit mode declaration
+
+**Migration Steps:**
+1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
+2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
+3. Optionally add `MCP_DEPLOYMENT_MODE=oauth_single_audience`
+4. Restart server
+5. Check logs for confirmation: "Automatically enabled background operations for semantic search"
+
+---
+
+### Scenario 3: Multi-User OAuth WITHOUT Semantic Search
+
+**Before (v0.57.x):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# Enable background operations for future features
+ENABLE_OFFLINE_ACCESS=true
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+```
+
+**After (v0.58.0+):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# Optional: Explicit mode declaration
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# Renamed for clarity
+ENABLE_BACKGROUND_OPERATIONS=true  # Previously ENABLE_OFFLINE_ACCESS
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+```
+
+**What Changed:**
+- ✅ Renamed `ENABLE_OFFLINE_ACCESS` to `ENABLE_BACKGROUND_OPERATIONS`
+- ✅ Added optional explicit mode declaration
+
+**Migration Steps:**
+1. Replace `ENABLE_OFFLINE_ACCESS=true` with `ENABLE_BACKGROUND_OPERATIONS=true`
+2. Optionally add `MCP_DEPLOYMENT_MODE=oauth_single_audience`
+3. Restart server
+
+---
+
+### Scenario 4: Multi-User BasicAuth with Semantic Search
+
+**Before (v0.57.x):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_MULTI_USER_BASIC_AUTH=true
+
+# Both required - redundant
+ENABLE_OFFLINE_ACCESS=true
+VECTOR_SYNC_ENABLED=true
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+```
+
+**After (v0.58.0+ - Simplified):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_MULTI_USER_BASIC_AUTH=true
+
+# Optional: Explicit mode declaration
+MCP_DEPLOYMENT_MODE=multi_user_basic
+
+# One variable handles both!
+ENABLE_SEMANTIC_SEARCH=true  # Auto-enables background operations
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
+NEXTCLOUD_OIDC_CLIENT_SECRET=secret
+
+# Note: ENABLE_OFFLINE_ACCESS no longer needed!
+```
+
+**What Changed:**
+- ✅ Semantic search auto-enables background operations
+- ✅ Removed need for explicit `ENABLE_OFFLINE_ACCESS`
+- ✅ Clearer variable naming
+
+**Migration Steps:**
+1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
+2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
+3. Optionally add `MCP_DEPLOYMENT_MODE=multi_user_basic`
+4. Restart server
+
+---
+
+### Scenario 5: Token Exchange Mode with Semantic Search
+
+**Before (v0.57.x):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_TOKEN_EXCHANGE=true
+
+# Both required
+ENABLE_OFFLINE_ACCESS=true
+VECTOR_SYNC_ENABLED=true
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+TOKEN_EXCHANGE_CACHE_TTL=300
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+**After (v0.58.0+ - Simplified):**
+```bash
+NEXTCLOUD_HOST=https://nextcloud.example.com
+ENABLE_TOKEN_EXCHANGE=true
+
+# Optional: Explicit mode declaration
+MCP_DEPLOYMENT_MODE=oauth_token_exchange
+
+# One variable!
+ENABLE_SEMANTIC_SEARCH=true  # Auto-enables background operations
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+TOKEN_EXCHANGE_CACHE_TTL=300
+QDRANT_URL=http://qdrant:6333
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+**What Changed:**
+- ✅ Semantic search auto-enables background operations
+- ✅ Explicit mode declaration available
+
+**Migration Steps:**
+1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
+2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
+3. Optionally add `MCP_DEPLOYMENT_MODE=oauth_token_exchange`
+4. Restart server
+
+---
+
+## Understanding Automatic Dependency Resolution
+
+### How It Works
+
+In v0.58.0+, the server uses smart dependency resolution:
+
+```python
+# In multi-user modes (OAuth, Multi-User BasicAuth):
+if ENABLE_SEMANTIC_SEARCH == true:
+    background_operations = automatically enabled
+    refresh_tokens = automatically requested
+    token_storage = required (TOKEN_ENCRYPTION_KEY, TOKEN_STORAGE_DB)
+    oauth_credentials = required (for app password retrieval)
+```
+
+**What this means:**
+- ✅ Set `ENABLE_SEMANTIC_SEARCH=true`
+- ✅ Provide required infrastructure (Qdrant, Ollama, encryption key)
+- ✅ System automatically enables background operations
+- ❌ No need to set `ENABLE_BACKGROUND_OPERATIONS` separately
+
+### When Automatic Enablement Happens
+
+| Deployment Mode | Semantic Search Enabled | Background Operations Auto-Enabled? |
+|----------------|------------------------|-----------------------------------|
+| Single-User BasicAuth | ✅ | ❌ No (not needed) |
+| Multi-User BasicAuth | ✅ | ✅ Yes |
+| OAuth Single-Audience | ✅ | ✅ Yes |
+| OAuth Token Exchange | ✅ | ✅ Yes |
+| Smithery Stateless | N/A (not supported) | N/A |
+
+### When to Explicitly Set ENABLE_BACKGROUND_OPERATIONS
+
+Only needed when you want background operations **without** semantic search:
+
+```bash
+# Example: OAuth mode with background operations but NO semantic search
+NEXTCLOUD_HOST=https://nextcloud.example.com
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# Explicitly enable background operations for future features
+ENABLE_BACKGROUND_OPERATIONS=true
+
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+
+# Semantic search disabled
+ENABLE_SEMANTIC_SEARCH=false
+```
+
+---
+
+## Explicit Mode Selection
+
+### Why Use MCP_DEPLOYMENT_MODE?
+
+**Benefits:**
+- ✅ Removes ambiguity about which mode is active
+- ✅ Validation errors reference specific mode requirements
+- ✅ Catches configuration mistakes early
+- ✅ Self-documenting configuration
+
+**Example:**
+```bash
+# Without explicit mode:
+NEXTCLOUD_HOST=https://nextcloud.example.com
+# Is this OAuth or Multi-User BasicAuth? Not immediately clear.
+
+# With explicit mode:
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+NEXTCLOUD_HOST=https://nextcloud.example.com
+# Clear: This is OAuth mode
+```
+
+### Valid Mode Values
+
+| Mode Value | Description |
+|-----------|-------------|
+| `single_user_basic` | Single-user with username/password |
+| `multi_user_basic` | Multi-user with BasicAuth pass-through |
+| `oauth_single_audience` | Multi-user OAuth (recommended) |
+| `oauth_token_exchange` | Multi-user OAuth with token exchange |
+| `smithery` | Smithery platform deployment |
+
+### Mode Detection Priority
+
+When `MCP_DEPLOYMENT_MODE` is set:
+1. ✅ Explicit mode is used
+2. ✅ Server validates configuration matches explicit mode
+3. ❌ Auto-detection is skipped
+
+When `MCP_DEPLOYMENT_MODE` is NOT set:
+1. ✅ Auto-detection runs (existing behavior)
+2. ✅ Priority: Smithery → Token Exchange → Multi-User BasicAuth → Single-User BasicAuth → OAuth Single-Audience
+
+---
+
+## Validation and Error Messages
+
+### Old Validation (v0.57.x)
+
+```
+Error: [multi_user_basic] ENABLE_OFFLINE_ACCESS is required when VECTOR_SYNC_ENABLED is enabled
+```
+
+**Problem:** User must understand internal dependency relationship
+
+### New Validation (v0.58.0+)
+
+```
+Error: [multi_user_basic] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
+```
+
+**Benefit:** Clear what's needed, no mention of internal ENABLE_BACKGROUND_OPERATIONS flag
+
+---
+
+## Troubleshooting Migration
+
+### Issue: Deprecation Warning After Migration
+
+**Symptom:**
+```
+WARNING: VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead.
+```
+
+**Solution:**
+1. Check for `VECTOR_SYNC_ENABLED` in `.env` file
+2. Replace with `ENABLE_SEMANTIC_SEARCH`
+3. Search for any scripts/CI configs using old name
+4. Restart server
+
+### Issue: Both Old and New Names Set
+
+**Symptom:**
+```
+WARNING: Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. Using ENABLE_SEMANTIC_SEARCH.
+```
+
+**Solution:**
+1. Remove `VECTOR_SYNC_ENABLED` from `.env`
+2. Keep `ENABLE_SEMANTIC_SEARCH`
+3. Restart server
+
+### Issue: Missing Required Dependencies
+
+**Symptom:**
+```
+Error: [oauth_single_audience] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
+```
+
+**Solution:**
+When semantic search is enabled in multi-user modes, you need:
+- `TOKEN_ENCRYPTION_KEY` - Generate with: `python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"`
+- `TOKEN_STORAGE_DB` - Path to SQLite database (e.g., `/app/data/tokens.db`)
+- `NEXTCLOUD_OIDC_CLIENT_ID` and `NEXTCLOUD_OIDC_CLIENT_SECRET` - For app password retrieval
+
+### Issue: Unexpected Mode Detected
+
+**Symptom:**
+Server activates `oauth_single_audience` mode when you expected `multi_user_basic`
+
+**Solution:**
+Add explicit mode declaration:
+```bash
+MCP_DEPLOYMENT_MODE=multi_user_basic
+ENABLE_MULTI_USER_BASIC_AUTH=true
+```
+
+---
+
+## Testing Your Migration
+
+### Step 1: Verify Configuration
+
+```bash
+# Set new variable names in .env
+cat .env | grep -E "(ENABLE_SEMANTIC_SEARCH|ENABLE_BACKGROUND_OPERATIONS|MCP_DEPLOYMENT_MODE)"
+```
+
+### Step 2: Check for Old Variable Names
+
+```bash
+# Should return nothing after migration
+cat .env | grep -E "(VECTOR_SYNC_ENABLED|ENABLE_OFFLINE_ACCESS)"
+```
+
+### Step 3: Start Server and Check Logs
+
+```bash
+# Start server
+docker-compose up mcp
+
+# Look for:
+# 1. No deprecation warnings
+# 2. Correct mode detected
+# 3. Auto-enablement messages (if using semantic search in multi-user mode)
+```
+
+**Expected Log Output (Multi-User OAuth + Semantic Search):**
+```
+INFO: Using explicit deployment mode: oauth_single_audience
+INFO: Automatically enabled background operations for semantic search in multi-user mode.
+INFO: Vector sync enabled. Starting background scanner...
+```
+
+### Step 4: Verify Functionality
+
+Test that existing features still work:
+- [ ] Semantic search returns results
+- [ ] Background indexing runs
+- [ ] OAuth flow completes successfully
+- [ ] Refresh tokens are stored/retrieved
+
+---
+
+## Quick Start Templates
+
+We provide mode-specific templates for new deployments:
+
+| Template | Use Case |
+|----------|----------|
+| `env.sample.single-user` | Simplest setup |
+| `env.sample.oauth-multi-user` | Recommended multi-user |
+| `env.sample.oauth-advanced` | Token exchange mode |
+
+**Usage:**
+```bash
+cp env.sample.oauth-multi-user .env
+# Edit .env with your values
+docker-compose up -d
+```
+
+---
+
+## Timeline and Support
+
+| Version | Status | Old Variable Support |
+|---------|--------|---------------------|
+| v0.57.x | Stable | Old names only |
+| v0.58.0 | Current | Both old and new (with warnings) |
+| v1.0.0 | Breaking | New names only |
+
+**Recommendation:** Migrate before v1.0.0 (12+ months minimum)
+
+---
+
+## Getting Help
+
+If you encounter issues during migration:
+
+1. **Check the logs** - Look for deprecation warnings and error messages
+2. **Review ADR-021** - See [docs/ADR-021-configuration-consolidation.md](ADR-021-configuration-consolidation.md)
+3. **Use mode-specific templates** - See `env.sample.*` files
+4. **File an issue** - Include your `.env` (redacted), logs, and mode
+
+---
+
+## Summary
+
+**What You Need to Do:**
+1. ✅ Rename `VECTOR_SYNC_ENABLED` → `ENABLE_SEMANTIC_SEARCH`
+2. ✅ (Optional) Rename `ENABLE_OFFLINE_ACCESS` → `ENABLE_BACKGROUND_OPERATIONS`
+3. ✅ (Recommended) Add `MCP_DEPLOYMENT_MODE` for clarity
+4. ✅ Remove redundant settings (semantic search auto-enables background ops in multi-user modes)
+5. ✅ Test your configuration
+
+**What the Server Does Automatically:**
+- ✅ Supports both old and new variable names
+- ✅ Logs deprecation warnings for old names
+- ✅ Auto-enables background operations when semantic search is enabled in multi-user modes
+- ✅ Validates configuration and provides clear error messages
+
+**Migration Timeline:**
+- Now → v1.0.0: Both old and new names work
+- v1.0.0+: Only new names supported
+
+**Questions?** See [docs/configuration.md](configuration.md) or file an issue.
@@ -2,25 +2,82 @@

 The Nextcloud MCP server requires configuration to connect to your Nextcloud instance. Configuration is provided through environment variables, typically stored in a `.env` file.

+> **Note:** Configuration was significantly simplified in v0.58.0. If you're upgrading from v0.57.x, see the [Configuration Migration Guide](configuration-migration-v2.md).
+
 ## Quick Start

-Create a `.env` file based on `env.sample`:
+We provide mode-specific configuration templates for quick setup:

 ```bash
+# Choose a template based on your deployment mode:
+cp env.sample.single-user .env         # Simplest - one user, local dev
+cp env.sample.oauth-multi-user .env    # Recommended - multi-user OAuth
+cp env.sample.oauth-advanced .env      # Advanced - token exchange mode
+
+# Or start from the full example:
 cp env.sample .env
+
 # Edit .env with your Nextcloud details
 ```

-Then choose your authentication mode:
+Then choose your deployment mode:

- [OAuth2/OIDC Configuration](#oauth2oidc-configuration) (Recommended)
- [Basic Authentication Configuration](#basic-authentication-legacy)
+- [Single-User BasicAuth](#single-user-basicauth-mode) - Simplest for personal instances
+- [Multi-User OAuth](#multi-user-oauth-modes) - Recommended for production
+- [Deployment Mode Selection](#deployment-mode-selection) - Explicit mode declaration

 ---

-## OAuth2/OIDC Configuration
+## Deployment Mode Selection

-OAuth2/OIDC is the recommended authentication mode for production deployments.
+**New in v0.58.0:** You can explicitly declare your deployment mode to remove ambiguity and catch configuration errors early.
+
+```dotenv
+# Optional but recommended
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+```
+
+**Valid values:**
+- `single_user_basic` - Single-user with username/password
+- `multi_user_basic` - Multi-user with BasicAuth pass-through
+- `oauth_single_audience` - Multi-user OAuth (recommended)
+- `oauth_token_exchange` - Multi-user OAuth with token exchange
+- `smithery` - Smithery platform deployment
+
+**Benefits:**
+- ✅ Clear which mode is active
+- ✅ Better validation error messages
+- ✅ Self-documenting configuration
+- ✅ Catches configuration mistakes early
+
+**Auto-detection:** If `MCP_DEPLOYMENT_MODE` is not set, the server auto-detects the mode based on other settings (existing behavior).
+
+See [Authentication Modes](authentication.md) for detailed comparison of deployment modes.
+
+---
+
+## Single-User BasicAuth Mode
+
+BasicAuth with a single user is the simplest deployment mode. Use for personal instances, local development, and testing.
+
+```dotenv
+# Minimal single-user configuration
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+
+# Optional: Explicit mode declaration
+MCP_DEPLOYMENT_MODE=single_user_basic
+```
+
+> [!WARNING]
+> **Security Notice:** BasicAuth stores credentials in environment variables and is less secure than OAuth. Use OAuth for production multi-user deployments.
+
+---
+
+## Multi-User OAuth Modes
+
+OAuth2/OIDC is the recommended authentication mode for production multi-user deployments.

 ### Minimal Configuration (Auto-registration)

@@ -28,6 +85,9 @@ OAuth2/OIDC is the recommended authentication mode for production deployments.
 # .env file for OAuth with auto-registration
 NEXTCLOUD_HOST=https://your.nextcloud.instance.com

+# Optional: Explicit mode declaration (recommended)
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
 # Leave these EMPTY for OAuth mode
 NEXTCLOUD_USERNAME=
 NEXTCLOUD_PASSWORD=
@@ -41,6 +101,9 @@ This minimal configuration uses dynamic client registration to automatically reg
 # .env file for OAuth with pre-configured client
 NEXTCLOUD_HOST=https://your.nextcloud.instance.com

+# Optional: Explicit mode declaration (recommended)
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
 # OAuth Client Credentials (optional - auto-registers if not provided)
 NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
 NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
@@ -108,10 +171,104 @@ NEXTCLOUD_PASSWORD=your_app_password_or_password

 ---

+## SSL/TLS Configuration (Optional)
+
+If your Nextcloud instance uses a self-signed certificate or a private CA (common with reverse proxies like Traefik or Caddy), the MCP server will reject the connection by default. Use these settings to configure certificate verification.
+
+### Custom CA Bundle (Recommended)
+
+Point the server at your CA certificate file:
+
+```dotenv
+NEXTCLOUD_CA_BUNDLE=/etc/ssl/certs/my-ca.pem
+```
+
+With Docker, mount the certificate as a read-only volume:
+
+```bash
+docker run \
+  -v /path/to/my-ca.pem:/etc/ssl/certs/my-ca.pem:ro \
+  -e NEXTCLOUD_CA_BUNDLE=/etc/ssl/certs/my-ca.pem \
+  -e NEXTCLOUD_HOST=https://nextcloud.local \
+  --env-file .env \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
+```
+
+### Disable Verification (Development Only)
+
+> [!WARNING]
+> Disabling TLS verification is insecure. Only use this for local development or testing.
+
+```dotenv
+NEXTCLOUD_VERIFY_SSL=false
+```
+
+### Environment Variables Reference
+
+| Variable | Required | Default | Description |
+|----------|----------|---------|-------------|
+| `NEXTCLOUD_VERIFY_SSL` | ⚠️ Optional | `true` | Set to `false` to disable TLS certificate verification |
+| `NEXTCLOUD_CA_BUNDLE` | ⚠️ Optional | - | Path to a PEM CA bundle file for custom certificate authorities |
+
+### Scope
+
+These settings apply to **all** outbound connections to Nextcloud and its OIDC endpoints, including:
+
+- Nextcloud API calls (Notes, Calendar, Contacts, WebDAV, etc.)
+- OIDC discovery and token endpoints
+- OAuth client registration (DCR)
+- Health checks
+
+They do **not** affect connections to internal services (Ollama, Qdrant, Unstructured) which have their own SSL configuration.
+
+---
+
 ## Semantic Search Configuration (Optional)

+**New in v0.58.0:** Simplified semantic search configuration with automatic dependency resolution.
+
 The MCP server includes semantic search capabilities powered by vector embeddings. This feature requires a vector database (Qdrant) and an embedding service.

+### Quick Start
+
+**Single-User Mode:**
+```dotenv
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+
+# Enable semantic search
+ENABLE_SEMANTIC_SEARCH=true
+
+# Vector database
+QDRANT_LOCATION=:memory:
+
+# Embedding provider
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+**Multi-User OAuth Mode:**
+```dotenv
+NEXTCLOUD_HOST=https://nextcloud.example.com
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# Enable semantic search
+# In multi-user modes, this AUTOMATICALLY enables background operations!
+ENABLE_SEMANTIC_SEARCH=true
+
+# Required for background operations (auto-enabled by semantic search)
+TOKEN_ENCRYPTION_KEY=your-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+
+# Vector database
+QDRANT_URL=http://qdrant:6333
+
+# Embedding provider
+OLLAMA_BASE_URL=http://ollama:11434
+```
+
+> **Note:** In multi-user modes (OAuth, Multi-User BasicAuth), enabling `ENABLE_SEMANTIC_SEARCH` automatically enables background operations and refresh token storage. You don't need to set `ENABLE_BACKGROUND_OPERATIONS` separately!
+
 ### Qdrant Vector Database Modes

 The server supports three Qdrant deployment modes:
@@ -126,7 +283,7 @@ No configuration needed! If neither `QDRANT_URL` nor `QDRANT_LOCATION` is set, t

 ```dotenv
 # No Qdrant configuration needed - defaults to :memory:
-VECTOR_SYNC_ENABLED=true
+ENABLE_SEMANTIC_SEARCH=true
 ```

 **Pros:**
@@ -145,7 +302,7 @@ For single-instance deployments that need persistence without a separate Qdrant
 ```dotenv
 # Local persistent storage
 QDRANT_LOCATION=/app/data/qdrant  # Or any writable path
-VECTOR_SYNC_ENABLED=true
+ENABLE_SEMANTIC_SEARCH=true
 ```

 **Pros:**
@@ -166,7 +323,7 @@ For production deployments with a dedicated Qdrant service:
 QDRANT_URL=http://qdrant:6333
 QDRANT_API_KEY=your-secret-api-key  # Optional
 QDRANT_COLLECTION=nextcloud_content  # Optional
-VECTOR_SYNC_ENABLED=true
+ENABLE_SEMANTIC_SEARCH=true
 ```

 **Pros:**
@@ -283,13 +440,15 @@ Solutions:
 - Data corruption in Qdrant
 - Confusing error messages during indexing

-### Vector Sync Configuration
+### Background Indexing Configuration

 Control background indexing behavior:

 ```dotenv
-# Vector sync settings (ADR-007)
-VECTOR_SYNC_ENABLED=true              # Enable background indexing
+# Semantic search (ADR-007, ADR-021)
+ENABLE_SEMANTIC_SEARCH=true           # Enable background indexing
+
+# Tuning parameters (advanced - only modify if needed)
 VECTOR_SYNC_SCAN_INTERVAL=300         # Scan interval in seconds (default: 5 minutes)
 VECTOR_SYNC_PROCESSOR_WORKERS=3       # Concurrent indexing workers (default: 3)
 VECTOR_SYNC_QUEUE_MAX_SIZE=10000      # Max queued documents (default: 10000)
@@ -299,6 +458,8 @@ DOCUMENT_CHUNK_SIZE=512               # Words per chunk (default: 512)
 DOCUMENT_CHUNK_OVERLAP=50             # Overlapping words between chunks (default: 50)
 ```

+> **Note:** The `VECTOR_SYNC_*` tuning parameters keep their names as they're implementation details. Only the user-facing feature flag was renamed to `ENABLE_SEMANTIC_SEARCH`.
+
 ### Embedding Service Configuration

 The server uses an embedding service to generate vector representations. Two options are available:
@@ -369,11 +530,11 @@ DOCUMENT_CHUNK_OVERLAP=100

 | Variable | Required | Default | Description |
 |----------|----------|---------|-------------|
+| `ENABLE_SEMANTIC_SEARCH` | ⚠️ Optional | `false` | Enable semantic search with background indexing (replaces `VECTOR_SYNC_ENABLED`) |
 | `QDRANT_URL` | ⚠️ Optional | - | Qdrant service URL (network mode) - mutually exclusive with `QDRANT_LOCATION` |
 | `QDRANT_LOCATION` | ⚠️ Optional | `:memory:` | Local Qdrant path (`:memory:` or `/path/to/data`) - mutually exclusive with `QDRANT_URL` |
 | `QDRANT_API_KEY` | ⚠️ Optional | - | Qdrant API key (network mode only) |
-| `QDRANT_COLLECTION` | ⚠️ Optional | `nextcloud_content` | Qdrant collection name |
-| `VECTOR_SYNC_ENABLED` | ⚠️ Optional | `false` | Enable background vector indexing |
+| `QDRANT_COLLECTION` | ⚠️ Optional | Auto-generated | Qdrant collection name |
 | `VECTOR_SYNC_SCAN_INTERVAL` | ⚠️ Optional | `300` | Document scan interval (seconds) |
 | `VECTOR_SYNC_PROCESSOR_WORKERS` | ⚠️ Optional | `3` | Concurrent indexing workers |
 | `VECTOR_SYNC_QUEUE_MAX_SIZE` | ⚠️ Optional | `10000` | Max queued documents |
@@ -383,6 +544,9 @@ DOCUMENT_CHUNK_OVERLAP=100
 | `DOCUMENT_CHUNK_SIZE` | ⚠️ Optional | `512` | Words per chunk for document embedding |
 | `DOCUMENT_CHUNK_OVERLAP` | ⚠️ Optional | `50` | Overlapping words between chunks (must be < chunk size) |

+**Deprecated variables (still functional):**
+- `VECTOR_SYNC_ENABLED` - Use `ENABLE_SEMANTIC_SEARCH` instead (will be removed in v1.0.0)
+
 ### Docker Compose Example

 Enable network mode Qdrant with docker-compose:
@@ -392,7 +556,7 @@ services:
  mcp:
    environment:
      - QDRANT_URL=http://qdrant:6333
-      - VECTOR_SYNC_ENABLED=true
+      - ENABLE_SEMANTIC_SEARCH=true

  qdrant:
    image: qdrant/qdrant:latest
@@ -545,6 +709,7 @@ uv run nextcloud-mcp-server --no-oauth \

 ## See Also

+- [Configuration Migration Guide v2](configuration-migration-v2.md) - **New in v0.58.0:** Migrate from old variable names
 - [OAuth Quick Start](quickstart-oauth.md) - 5-minute OAuth setup for development
 - [OAuth Setup Guide](oauth-setup.md) - Detailed OAuth configuration for production
 - [OAuth Architecture](oauth-architecture.md) - How OAuth works in the MCP server
@@ -553,3 +718,4 @@ uv run nextcloud-mcp-server --no-oauth \
 - [Running the Server](running.md) - Starting the server with different configurations
 - [Troubleshooting](troubleshooting.md) - Common configuration issues
 - [OAuth Troubleshooting](oauth-troubleshooting.md) - OAuth-specific troubleshooting
+- [ADR-021](ADR-021-configuration-consolidation.md) - Configuration consolidation architecture decision
@@ -0,0 +1,301 @@
+# Database Migrations
+
+This document describes the database migration system for nextcloud-mcp-server's token storage database.
+
+## Overview
+
+The token storage database uses [Alembic](https://alembic.sqlalchemy.org/) for schema versioning and migrations. Alembic provides:
+
+- **Version Control**: Track schema changes in Git
+- **Rollback Support**: Safely downgrade schema if needed
+- **Audit Trail**: Migration files serve as schema changelog
+- **Automated Upgrades**: Database schema updates automatically on startup
+
+## Architecture
+
+### Migration Strategy
+
+The system handles three scenarios:
+
+1. **New Database**: Runs migrations from scratch to create all tables
+2. **Pre-Alembic Database**: Stamps existing database with initial revision (no changes)
+3. **Alembic-Managed Database**: Upgrades to latest version automatically
+
+### Directory Structure
+
+```
+nextcloud-mcp-server/
+├── alembic/                              # Alembic migrations
+│   ├── versions/                         # Migration scripts
+│   │   └── 20251217_2200_001_initial_schema.py
+│   ├── env.py                            # Alembic environment
+│   ├── script.py.mako                    # Migration template
+│   └── README                            # Migration usage guide
+├── alembic.ini                           # Alembic configuration
+└── nextcloud_mcp_server/
+    ├── auth/storage.py                   # Uses migrations on init
+    └── migrations.py                     # Migration utilities
+```
+
+## Usage
+
+### Automatic Migration on Startup
+
+Migrations run automatically when the server starts:
+
+```bash
+uv run nextcloud-mcp-server
+```
+
+The `RefreshTokenStorage.initialize()` method:
+1. Checks if database is Alembic-managed
+2. Stamps pre-Alembic databases with initial revision
+3. Upgrades to latest version
+
+### Manual Migration Commands
+
+```bash
+# Show current database version
+uv run nextcloud-mcp-server db current
+
+# Upgrade database to latest version
+uv run nextcloud-mcp-server db upgrade
+
+# Show migration history
+uv run nextcloud-mcp-server db history
+
+# Downgrade by one version (emergency use only)
+uv run nextcloud-mcp-server db downgrade
+
+# Specify custom database path
+uv run nextcloud-mcp-server db current -d /path/to/tokens.db
+```
+
+### Environment Variables
+
+- `TOKEN_STORAGE_DB`: Path to database file (default: `/app/data/tokens.db`)
+
+## Creating Migrations (Developers)
+
+### Step 1: Create Migration File
+
+```bash
+uv run nextcloud-mcp-server db migrate "add user preferences table"
+```
+
+This creates a new migration file in `alembic/versions/` with empty `upgrade()` and `downgrade()` functions.
+
+### Step 2: Write Migration SQL
+
+Since we don't use SQLAlchemy models, write raw SQL:
+
+```python
+def upgrade() -> None:
+    """Add user preferences table."""
+    op.execute("""
+        CREATE TABLE user_preferences (
+            user_id TEXT PRIMARY KEY,
+            theme TEXT DEFAULT 'light',
+            language TEXT DEFAULT 'en',
+            created_at INTEGER NOT NULL
+        )
+    """)
+
+    op.execute("""
+        CREATE INDEX idx_user_preferences_user_id
+        ON user_preferences(user_id)
+    """)
+
+
+def downgrade() -> None:
+    """Remove user preferences table."""
+    op.execute("DROP INDEX IF EXISTS idx_user_preferences_user_id")
+    op.execute("DROP TABLE IF EXISTS user_preferences")
+```
+
+### Step 3: Test Migration
+
+```bash
+# Test upgrade
+uv run nextcloud-mcp-server db upgrade -d /tmp/test.db
+
+# Verify schema
+sqlite3 /tmp/test.db ".schema"
+
+# Test downgrade
+uv run nextcloud-mcp-server db downgrade -d /tmp/test.db
+
+# Verify removal
+sqlite3 /tmp/test.db ".schema"
+```
+
+### Step 4: Commit Migration
+
+```bash
+git add alembic/versions/YYYYMMDD_HHMM_XXX_description.py
+git commit -m "feat: add user preferences table migration"
+```
+
+## SQLite Limitations
+
+SQLite has limited `ALTER TABLE` support:
+
+### Supported Operations
+
+- ✅ Add columns: `ALTER TABLE table ADD COLUMN ...`
+- ✅ Rename table: `ALTER TABLE old RENAME TO new`
+- ✅ Rename column: `ALTER TABLE table RENAME COLUMN old TO new` (SQLite 3.25+)
+
+### Unsupported Operations (Requires Table Recreation)
+
+- ❌ Drop column
+- ❌ Change column type
+- ❌ Add constraints to existing columns
+
+### Table Recreation Pattern
+
+For complex schema changes:
+
+```python
+def upgrade() -> None:
+    # Create new table with desired schema
+    op.execute("""
+        CREATE TABLE refresh_tokens_new (
+            user_id TEXT PRIMARY KEY,
+            encrypted_token BLOB NOT NULL,
+            new_field TEXT,  -- New column
+            expires_at INTEGER,
+            created_at INTEGER NOT NULL
+        )
+    """)
+
+    # Copy data from old table
+    op.execute("""
+        INSERT INTO refresh_tokens_new
+        (user_id, encrypted_token, expires_at, created_at)
+        SELECT user_id, encrypted_token, expires_at, created_at
+        FROM refresh_tokens
+    """)
+
+    # Drop old table and rename new table
+    op.execute("DROP TABLE refresh_tokens")
+    op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
+
+    # Recreate indexes
+    op.execute("CREATE INDEX idx_user_id ON refresh_tokens(user_id)")
+```
+
+## Best Practices
+
+### Naming Conventions
+
+- **Migrations**: `YYYYMMDD_HHMM_XXX_description.py`
+- **Revision IDs**: Sequential numbers (`001`, `002`, `003`)
+- **Descriptions**: Imperative mood ("add table", "remove column")
+
+### Migration Guidelines
+
+1. **Test Thoroughly**: Test both upgrade and downgrade paths
+2. **Preserve Data**: Ensure data migration logic is correct
+3. **Document Changes**: Add comments explaining complex operations
+4. **Small Changes**: One logical change per migration
+5. **No Breaking Changes**: Maintain backward compatibility when possible
+
+### Downgrade Considerations
+
+- **Data Loss**: Downgrade may lose data (dropped columns, tables)
+- **Confirmation**: Downgrade command requires explicit confirmation
+- **Testing**: Always test downgrade path before deploying
+- **Emergency Only**: Use downgrades only for critical rollbacks
+
+## Backward Compatibility
+
+### Pre-Alembic Databases
+
+Existing databases created before Alembic integration are automatically detected and stamped with revision `001`:
+
+1. Server detects no `alembic_version` table
+2. Checks if `refresh_tokens` table exists
+3. If yes, stamps database with `001` (no schema changes)
+4. Future updates use normal migration path
+
+### Migration Path
+
+```
+Pre-Alembic DB → Stamp(001) → Upgrade(002) → Upgrade(003) → ...
+New DB → Migrate(001) → Upgrade(002) → Upgrade(003) → ...
+```
+
+## Troubleshooting
+
+### Migration Fails
+
+```bash
+# Check current state
+uv run nextcloud-mcp-server db current -d /path/to/tokens.db
+
+# View migration history
+uv run nextcloud-mcp-server db history -d /path/to/tokens.db
+
+# Manually inspect database
+sqlite3 /path/to/tokens.db ".schema"
+```
+
+### Reset to Initial State
+
+**WARNING: This destroys all data!**
+
+```bash
+# Downgrade to base (empty database)
+uv run nextcloud-mcp-server db downgrade -d /path/to/tokens.db --revision base
+
+# Upgrade to latest
+uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
+```
+
+### Corrupted Migration State
+
+If `alembic_version` table is corrupted:
+
+```bash
+# Manually fix via SQL
+sqlite3 /path/to/tokens.db
+> DELETE FROM alembic_version;
+> INSERT INTO alembic_version (version_num) VALUES ('001');
+> .quit
+
+# Verify and upgrade
+uv run nextcloud-mcp-server db current -d /path/to/tokens.db
+uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
+```
+
+## CI/CD Integration
+
+### Pre-Deployment
+
+```bash
+# Run migrations in test environment
+export TOKEN_STORAGE_DB=/app/data/tokens.db
+uv run nextcloud-mcp-server db upgrade
+
+# Verify current version
+uv run nextcloud-mcp-server db current
+```
+
+### Docker Deployment
+
+Migrations run automatically on container startup via `RefreshTokenStorage.initialize()`.
+
+### Rollback Plan
+
+1. Stop application
+2. Backup database: `cp tokens.db tokens.db.backup`
+3. Downgrade: `uv run nextcloud-mcp-server db downgrade --revision XXX`
+4. Deploy previous application version
+5. Restart application
+
+## References
+
+- [Alembic Documentation](https://alembic.sqlalchemy.org/)
+- [SQLite ALTER TABLE Limitations](https://www.sqlite.org/lang_altertable.html)
+- [ADR-004: Progressive Consent](./ADR-004-progressive-consent.md) (migration 001)
@@ -243,7 +243,7 @@ If you see cardinality warnings:
 The observability stack integrates at multiple layers:

 1. **HTTP Layer**: `ObservabilityMiddleware` tracks all HTTP requests
-2. **MCP Layer**: Tools use `@trace_mcp_tool` for span creation
+2. **MCP Layer**: Tools use `@instrument_tool` for automatic metrics and trace span creation
 3. **Client Layer**: `BaseNextcloudClient` tracks all API calls
 4. **OAuth Layer**: Token operations are traced and metered
 5. **Background Tasks**: Vector sync operations emit metrics/traces
@@ -14,100 +14,10 @@ Before running the server:

 ## Quick Start

-Load your environment variables and start the server:
+Start the server using Docker:

 ```bash
-# Load environment variables from .env
-export $(grep -v '^#' .env | xargs)
-
-# Start the server
-uv run nextcloud-mcp-server
-```
-
-The server will start on `http://127.0.0.1:8000` by default.
-
---
-
-## Running Locally
-
-### Method 1: Using nextcloud-mcp-server CLI (Recommended)
-
-The CLI provides a simple interface with built-in defaults:
-
-#### OAuth Mode
-
-```bash
-# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD not set
-uv run nextcloud-mcp-server
-
-# Explicitly force OAuth mode
-uv run nextcloud-mcp-server --oauth
-
-# OAuth with custom host and port
-uv run nextcloud-mcp-server --oauth --host 0.0.0.0 --port 8080
-
-# OAuth with pre-configured client
-uv run nextcloud-mcp-server --oauth \
-  --oauth-client-id abc123 \
-  --oauth-client-secret xyz789
-
-# OAuth with specific apps only
-uv run nextcloud-mcp-server --oauth \
-  --enable-app notes \
-  --enable-app calendar
-```
-
-#### BasicAuth Mode (Legacy)
-
-```bash
-# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD are set
-uv run nextcloud-mcp-server
-
-# Explicitly force BasicAuth mode
-uv run nextcloud-mcp-server --no-oauth
-
-# BasicAuth with specific apps
-uv run nextcloud-mcp-server --no-oauth \
-  --enable-app notes \
-  --enable-app webdav
-```
-
-### Method 2: Using uvicorn
-
-For more control over server options (workers, reload, etc.):
-
-```bash
-# Load environment variables
-export $(grep -v '^#' .env | xargs)
-
-# Run with uvicorn
-uv run uvicorn nextcloud_mcp_server.app:get_app \
-  --factory \
-  --host 127.0.0.1 \
-  --port 8000 \
-  --reload  # Enable auto-reload for development
-```
-
-See all uvicorn options at [https://www.uvicorn.org/settings/](https://www.uvicorn.org/settings/)
-
-### Method 3: Using Python Module
-
-```bash
-# Load environment variables
-export $(grep -v '^#' .env | xargs)
-
-# Run as Python module
-python -m nextcloud_mcp_server.app --oauth --port 8000
-```
-
---
-
-## Running with Docker
-
-### Basic Docker Run
-
-```bash
-# OAuth mode
+# OAuth mode (recommended)
 docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth

@@ -116,11 +26,56 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
 ```

-### Docker with Persistent OAuth Storage
+The server will start on `http://127.0.0.1:8000` by default.
+
+---
+
+## Running with Docker
+
+### Basic Docker Run
+
+#### OAuth Mode (Recommended)

 ```bash
+# OAuth with auto-registration
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
+
+# OAuth with custom port
+docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
+
+# OAuth with pre-configured client
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  -e NEXTCLOUD_OIDC_CLIENT_ID=abc123 \
+  -e NEXTCLOUD_OIDC_CLIENT_SECRET=xyz789 \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
+
+# OAuth with specific apps only
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --enable-app notes --enable-app calendar
+```
+
+#### BasicAuth Mode (Legacy)
+
+```bash
+# BasicAuth (requires NEXTCLOUD_USERNAME/PASSWORD in .env)
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
+
+# BasicAuth with specific apps
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest \
+  --enable-app notes --enable-app webdav
+```
+
+### Docker with Persistent Token Storage
+
+```bash
+# Mount volume for persistent OAuth token storage
 docker run -p 127.0.0.1:8000:8000 --env-file .env \
-  -v $(pwd)/.oauth:/app/.oauth \
+  -v $(pwd)/data:/app/data \
  --rm ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
 ```

@@ -140,7 +95,7 @@ services:
    env_file:
      - .env
    volumes:
-      - ./oauth-storage:/app/.oauth
+      - ./data:/app/data  # Persistent token storage
    restart: unless-stopped
 ```

@@ -168,30 +123,39 @@ docker-compose down

 ```bash
 # Bind to all interfaces (accessible from network)
-uv run nextcloud-mcp-server --host 0.0.0.0 --port 8000
+docker run -p 0.0.0.0:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth

 # Bind to localhost only (default, more secure)
-uv run nextcloud-mcp-server --host 127.0.0.1 --port 8000
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth

-# Use a different port
-uv run nextcloud-mcp-server --port 8080
+# Use a different port (map host port 8080 to container port 8000)
+docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
 ```

-**Security Note:** Using `--host 0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.
+**Security Note:** Binding to `0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.

 ### Transport Protocols

 The server supports multiple MCP transport protocols:

 ```bash
-# Streamable HTTP (recommended)
-uv run nextcloud-mcp-server --transport streamable-http
+# Streamable HTTP (default, recommended)
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --transport streamable-http

-# SSE - Server-Sent Events (default, deprecated)
-uv run nextcloud-mcp-server --transport sse
+# SSE - Server-Sent Events (deprecated)
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --transport sse

 # HTTP
-uv run nextcloud-mcp-server --transport http
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --transport http
 ```

 > [!WARNING]
@@ -201,10 +165,14 @@ uv run nextcloud-mcp-server --transport http

 ```bash
 # Set log level (critical, error, warning, info, debug, trace)
-uv run nextcloud-mcp-server --log-level debug
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --log-level debug

 # Production: use warning or error
-uv run nextcloud-mcp-server --log-level warning
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --log-level warning
 ```

 ### Selective App Enablement
@@ -212,22 +180,26 @@ uv run nextcloud-mcp-server --log-level warning
 By default, all supported Nextcloud apps are enabled. You can enable specific apps only:

 ```bash
-# Available apps: notes, tables, webdav, calendar, contacts, deck
+# Available apps: notes, tables, webdav, calendar, contacts, cookbook, deck

 # Enable all apps (default)
-uv run nextcloud-mcp-server
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth

 # Enable only Notes
-uv run nextcloud-mcp-server --enable-app notes
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --enable-app notes

 # Enable multiple apps
-uv run nextcloud-mcp-server \
-  --enable-app notes \
-  --enable-app calendar \
-  --enable-app contacts
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --enable-app notes --enable-app calendar --enable-app contacts

 # Enable only WebDAV for file operations
-uv run nextcloud-mcp-server --enable-app webdav
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --enable-app webdav
 ```

 **Use cases:**
@@ -240,24 +212,68 @@ uv run nextcloud-mcp-server --enable-app webdav

 ## Development Mode

-For active development with auto-reload:
+### Running for Development
+
+For active development with auto-reload, mount your source code as a volume:

 ```bash
-# Using uvicorn with reload
-uv run uvicorn nextcloud_mcp_server.app:get_app \
-  --factory \
-  --reload \
-  --host 127.0.0.1 \
-  --port 8000 \
+# Development mode with source code mounted
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  -v $(pwd):/app \
+  -v $(pwd)/data:/app/data \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
  --log-level debug
 ```

-Or use the CLI with reload flag:
+For local development without Docker:

 ```bash
-uv run nextcloud-mcp-server --reload --log-level debug
+# Load environment variables
+export $(grep -v '^#' .env | xargs)
+
+# Run the server with auto-reload
+uv run nextcloud-mcp-server run --oauth --log-level debug
 ```

+### CLI Subcommands
+
+The `nextcloud-mcp-server` CLI has two main subcommands:
+
+1. **`run`** - Start the MCP server (default command in Docker)
+   ```bash
+   uv run nextcloud-mcp-server run --oauth --host 0.0.0.0 --port 8000
+   ```
+
+2. **`db`** - Database migration management (Alembic)
+   ```bash
+   # Show current migration revision
+   uv run nextcloud-mcp-server db current
+
+   # Upgrade to latest migration
+   uv run nextcloud-mcp-server db upgrade
+
+   # Show migration history
+   uv run nextcloud-mcp-server db history
+
+   # Create new migration (developers only)
+   uv run nextcloud-mcp-server db migrate "description of changes"
+   ```
+
+### Database Migrations
+
+Token storage uses **Alembic** for schema management:
+
+- **Automatic migrations**: Database is upgraded automatically on server startup
+- **Backward compatibility**: Pre-Alembic databases are automatically stamped with the initial revision
+- **Migration files**: Located in `alembic/versions/`
+- **For developers**: When changing the schema:
+  1. Create a migration: `uv run nextcloud-mcp-server db migrate "add new column"`
+  2. Edit the generated file in `alembic/versions/` to add SQL statements
+  3. Test upgrade: `uv run nextcloud-mcp-server db upgrade`
+  4. Test downgrade: `uv run nextcloud-mcp-server db downgrade`
+
+See [Database Migrations Guide](database-migrations.md) for detailed information.
+
 ---

 ## Connecting to the Server
@@ -266,15 +282,15 @@ uv run nextcloud-mcp-server --reload --log-level debug

 MCP Inspector is a browser-based tool for testing MCP servers:

-```bash
-# Start MCP Inspector
-uv run mcp dev
-
-# In the browser:
-# 1. Enter server URL: http://localhost:8000
-# 2. Complete OAuth flow (if using OAuth)
-# 3. Explore tools and resources
-```
+1. Start your MCP server using Docker (see above)
+2. Start MCP Inspector:
+   ```bash
+   npx @modelcontextprotocol/inspector
+   ```
+3. In the browser:
+   - Enter server URL: `http://localhost:8000`
+   - Complete OAuth flow (if using OAuth)
+   - Explore tools and resources

 ### Using MCP Clients

@@ -322,48 +338,13 @@ INFO     Initializing Nextcloud client with BasicAuth

 ### Running as a Background Service

-#### Using systemd (Linux)
-
-Create `/etc/systemd/system/nextcloud-mcp.service`:
-
-```ini
-[Unit]
-Description=Nextcloud MCP Server
-After=network.target
-
-[Service]
-Type=simple
-User=your-user
-WorkingDirectory=/path/to/nextcloud-mcp-server
-EnvironmentFile=/path/to/.env
-ExecStart=/path/to/uv run nextcloud-mcp-server --oauth
-Restart=on-failure
-RestartSec=10
-
-[Install]
-WantedBy=multi-user.target
-```
-
-Enable and start:
-
-```bash
-sudo systemctl daemon-reload
-sudo systemctl enable nextcloud-mcp
-sudo systemctl start nextcloud-mcp
-sudo systemctl status nextcloud-mcp
-```
-
-#### Using Docker Compose
-
-See [Docker Compose section](#docker-compose) above - includes `restart: unless-stopped`.
+Use Docker Compose with `restart: unless-stopped` (see [Docker Compose section](#docker-compose) above).

 ### Monitoring Logs

 ```bash
-# Local installation with systemd
-sudo journalctl -u nextcloud-mcp -f
-
-# Docker
+# Docker (find container name first)
+docker ps
 docker logs -f <container-name>

 # Docker Compose
@@ -374,35 +355,38 @@ docker-compose logs -f mcp

 ## Performance Tuning

-### Multiple Workers
-
-For production deployments with higher load:
-
-```bash
-# Using CLI (if supported)
-uv run nextcloud-mcp-server --workers 4
-
-# Using uvicorn
-uv run uvicorn nextcloud_mcp_server.app:get_app \
-  --factory \
-  --workers 4 \
-  --host 0.0.0.0 \
-  --port 8000
-```
-
 ### Production Settings

-```bash
-# Recommended production configuration
-uv run nextcloud-mcp-server \
-  --oauth \
-  --host 127.0.0.1 \
-  --port 8000 \
-  --log-level warning \
-  --transport streamable-http \
-  --workers 2
+For production deployments, use Docker Compose with the recommended settings:
+
+```yaml
+version: '3.8'
+
+services:
+  mcp:
+    image: ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
+    command: --oauth --log-level warning --transport streamable-http
+    ports:
+      - "127.0.0.1:8000:8000"
+    env_file:
+      - .env
+    volumes:
+      - ./data:/app/data
+    restart: unless-stopped
+    deploy:
+      resources:
+        limits:
+          cpus: '2'
+          memory: 1G
+        reservations:
+          cpus: '0.5'
+          memory: 512M
 ```

+### Scaling with Multiple Replicas
+
+For higher load, use Docker Swarm or Kubernetes. See the [Helm Chart](../helm/) for Kubernetes deployments.
+
 ---

 ## Troubleshooting
@@ -411,12 +395,18 @@ uv run nextcloud-mcp-server \

 Check logs for errors:
 ```bash
-uv run nextcloud-mcp-server --log-level debug
+# View container logs
+docker logs <container-name>
+
+# Or run with debug logging
+docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
+  ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
+  --log-level debug
 ```

 Common issues:
- Environment variables not loaded - See [Configuration](configuration.md#loading-environment-variables)
- Port already in use - Try a different port with `--port`
+- Environment variables not loaded - Check your `.env` file
+- Port already in use - Use a different host port (e.g., `-p 127.0.0.1:8080:8000`)
 - OAuth configuration errors - See [Troubleshooting](troubleshooting.md)

 ### Can't connect to server
@@ -5,7 +5,7 @@ This document explains the architecture of the semantic search feature in the Ne
 > [!IMPORTANT]
 > **Status: Experimental**
 > - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
-> - Currently supports **Notes app only** (multi-app architecture ready, additional apps planned)
+> - Currently supports **Notes, Files (PDFs), News items, and Deck cards**
 > - Requires additional infrastructure (Qdrant vector database + Ollama embedding service)
 > - RAG answer generation requires MCP client sampling support

@@ -39,9 +39,9 @@ Semantic search enables:

 ### Current Support

- **Supported Apps**: Notes (fully implemented)
- **Planned Apps**: Calendar events, Calendar tasks, Deck cards, Files (with text extraction), Contacts
- **Architecture**: Multi-app plugin system ready, awaiting implementation
+- **Supported Apps**: Notes, Files (PDFs with text extraction), News items, Deck cards
+- **Planned Apps**: Calendar events, Calendar tasks, Contacts
+- **Architecture**: Multi-app plugin system ready for additional apps

 ## System Components

@@ -4,6 +4,146 @@ This guide covers common issues and solutions for the Nextcloud MCP server.

 > **OAuth-specific issues?** See the dedicated [OAuth Troubleshooting Guide](oauth-troubleshooting.md) for OAuth authentication problems, OIDC discovery issues, token validation failures, and more.

+> **Upgrading from v0.57.x?** See the [Configuration Migration Guide](configuration-migration-v2.md) for help with new variable names.
+
+## Configuration Issues (v0.58.0+)
+
+### Issue: Deprecation warning for VECTOR_SYNC_ENABLED
+
+**Symptom:**
+```
+WARNING: VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead.
+```
+
+**Cause:** You're using the old variable name from v0.57.x.
+
+**Solution:**
+```bash
+# In your .env file, replace:
+VECTOR_SYNC_ENABLED=true
+
+# With:
+ENABLE_SEMANTIC_SEARCH=true
+```
+
+See [Configuration Migration Guide](configuration-migration-v2.md) for complete migration instructions.
+
+---
+
+### Issue: Deprecation warning for ENABLE_OFFLINE_ACCESS
+
+**Symptom:**
+```
+WARNING: ENABLE_OFFLINE_ACCESS is deprecated. Please use ENABLE_BACKGROUND_OPERATIONS instead.
+```
+
+**Cause:** You're using the old variable name from v0.57.x.
+
+**Solution:**
+
+**If you have semantic search enabled:**
+```bash
+# In multi-user modes, you can remove ENABLE_OFFLINE_ACCESS entirely!
+# ENABLE_SEMANTIC_SEARCH automatically enables background operations
+
+# Before (v0.57.x):
+ENABLE_OFFLINE_ACCESS=true
+VECTOR_SYNC_ENABLED=true
+
+# After (v0.58.0+):
+ENABLE_SEMANTIC_SEARCH=true  # This is all you need!
+```
+
+**If you only want background operations (no semantic search):**
+```bash
+# Replace:
+ENABLE_OFFLINE_ACCESS=true
+
+# With:
+ENABLE_BACKGROUND_OPERATIONS=true
+```
+
+---
+
+### Issue: "Invalid MCP_DEPLOYMENT_MODE"
+
+**Symptom:**
+```
+ValueError: Invalid MCP_DEPLOYMENT_MODE: 'oauth'. Valid values: single_user_basic, multi_user_basic, oauth_single_audience, oauth_token_exchange, smithery
+```
+
+**Cause:** Invalid value for `MCP_DEPLOYMENT_MODE`.
+
+**Solution:**
+Use one of the valid mode values:
+```bash
+# Correct values:
+MCP_DEPLOYMENT_MODE=single_user_basic          # Single-user with username/password
+MCP_DEPLOYMENT_MODE=multi_user_basic           # Multi-user BasicAuth
+MCP_DEPLOYMENT_MODE=oauth_single_audience      # OAuth (recommended)
+MCP_DEPLOYMENT_MODE=oauth_token_exchange       # OAuth with token exchange
+MCP_DEPLOYMENT_MODE=smithery                   # Smithery deployment
+```
+
+Or remove `MCP_DEPLOYMENT_MODE` to use automatic detection.
+
+---
+
+### Issue: Missing TOKEN_ENCRYPTION_KEY when semantic search enabled
+
+**Symptom:**
+```
+Error: [oauth_single_audience] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
+```
+
+**Cause:** In multi-user modes, semantic search automatically enables background operations, which require encrypted token storage.
+
+**Solution:**
+Generate an encryption key and add required token storage configuration:
+
+```bash
+# Generate encryption key
+python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+
+# Add to .env:
+TOKEN_ENCRYPTION_KEY=<generated-key>
+TOKEN_STORAGE_DB=/app/data/tokens.db
+NEXTCLOUD_OIDC_CLIENT_ID=your-client-id       # Required for app password retrieval
+NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
+```
+
+**Why this happens:**
+- v0.58.0+ automatically enables background operations when `ENABLE_SEMANTIC_SEARCH=true` in multi-user modes
+- Background operations need encrypted refresh token storage
+- This simplifies configuration but requires the encryption infrastructure
+
+See [Configuration Guide - Semantic Search](configuration.md#semantic-search-configuration-optional) for details.
+
+---
+
+### Issue: Both old and new variable names set
+
+**Symptom:**
+```
+WARNING: Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. Using ENABLE_SEMANTIC_SEARCH.
+```
+
+**Cause:** You have both the old and new variable names in your configuration.
+
+**Solution:**
+Remove the old variable name:
+```bash
+# Remove this line:
+VECTOR_SYNC_ENABLED=true
+
+# Keep this line:
+ENABLE_SEMANTIC_SEARCH=true
+```
+
+The server will use the new name and ignore the old one, but it's cleaner to remove the old variable entirely.
+
+---
+
 ## OAuth Issues (Quick Reference)

 ### Issue: "OAuth mode requires NEXTCLOUD_HOST environment variable"
@@ -0,0 +1,93 @@
+# Vector Sync UI Guide
+
+This guide covers the browser-based interface for the Nextcloud MCP Server's semantic search and vector synchronization features.
+
+## Overview
+
+The Vector Sync UI (`/app`) provides an interactive interface to test semantic search queries and visualize results from your Nextcloud documents. It exposes the same retrieval capabilities that LLMs use in Retrieval-Augmented Generation (RAG) workflows, powered by Alpine.js for reactive state, htmx for dynamic updates, and Plotly.js for 3D visualization.
+
+**Supported Apps**: Notes, Files (text/PDF), Calendar (events/tasks), Contacts (CardDAV), and Deck are indexed and searchable.
+
+## Accessing the UI
+
+Navigate to `/app` after authentication:
+- **BasicAuth mode**: `http://localhost:8000/app` (uses credentials from environment)
+- **OAuth mode**: `http://localhost:8000/app` (redirects to login if not authenticated)
+
+## Tabs
+
+### Welcome Page
+
+Landing page that introduces semantic search and RAG workflows. Shows authentication status, explains how vector embeddings work, and provides feature navigation. Adapts content based on whether `VECTOR_SYNC_ENABLED=true`.
+
+### User Info
+
+Displays authentication details and session information:
+- **BasicAuth**: Username, mode badge, Nextcloud host
+- **OAuth**: Username, session ID (truncated), background access status, IdP profile, revocation option
+
+### Vector Sync Status
+
+Real-time monitoring of document indexing:
+- **Indexed Documents**: Total chunks stored in Qdrant vector database (immediately searchable)
+- **Pending Documents**: Queue awaiting embedding processing
+- **Status**: "✓ Idle" (green) when up-to-date, "⟳ Syncing" (orange) during processing
+
+Auto-refreshes every 10 seconds via htmx. Check this tab after adding content to verify indexing completion.
+
+### Vector Visualization
+
+Interactive search interface with 3D PCA plot of semantic space.
+
+**Search Controls**:
+- **Query**: Natural language search (e.g., "health benefits of coffee")
+- **Algorithm**: Semantic (Dense) for pure vector search, or BM25 Hybrid (default) combining vectors + keywords
+- **Fusion** (Hybrid only): RRF (Reciprocal Rank Fusion) or DBSF (Distribution-Based Score Fusion)
+- **Advanced**: Filter by document type, adjust score threshold (0.0-1.0), set result limit (max 100)
+
+**3D Visualization**:
+
+The plot uses Principal Component Analysis (PCA) to reduce 768-dimensional embeddings to 3D. Documents are positioned by semantic similarity with the query point shown in red. Point size and opacity indicate relevance, and the Viridis color scale shows relative scores (yellow = highest match).
+
+**Critical Fix**: Vectors are L2-normalized before PCA to match Qdrant's cosine distance, ensuring query points position accurately near similar documents. Without normalization, magnitude differences cause misleading spatial separation.
+
+**Results List**:
+
+Each result shows document title (clickable link to Nextcloud), excerpt, raw score, relative percentage, and document type. Click "Show Chunk" to view the matched text segment with surrounding context (up to 500 characters before/after).
+
+## Configuration
+
+**Required**:
+```bash
+VECTOR_SYNC_ENABLED=true
+```
+
+**Optional** (for browser-accessible links):
+```bash
+NEXTCLOUD_PUBLIC_ISSUER_URL=https://your-public-nextcloud-url.com
+```
+
+**Admin Access**: Webhooks tab only visible to Nextcloud admins (verified via Provisioning API).
+
+## Use Cases
+
+**Testing Search Queries**: Preview results before they reach LLMs in RAG workflows. Compare semantic vs. hybrid algorithms, verify relevance scores, and validate that correct documents are retrieved. Use chunk context to see exactly which text segments match and why unexpected documents appear.
+
+**Monitoring Indexing**: Track real-time progress after creating or modifying documents. Check if the queue is backing up (high pending count) or confirm the system is idle after bulk imports. Verify documents become searchable immediately after indexing completes.
+
+**Algorithm Comparison**: Pure semantic search excels at conceptual queries and synonyms. BM25 hybrid combines semantic understanding with precise keyword matching for better accuracy on specific terms. Experiment with RRF vs. DBSF fusion for different score distributions.
+
+## Troubleshooting
+
+**Vector Sync Tab Not Visible**: Set `VECTOR_SYNC_ENABLED=true` and restart the server.
+
+**No Search Results**: Check Vector Sync Status to confirm documents are indexed (not just pending). Try broader queries or lower the score threshold in Advanced options. Initial indexing may take time depending on document volume.
+
+**Links to Nextcloud Apps Not Working**: Set `NEXTCLOUD_PUBLIC_ISSUER_URL` to your browser-accessible Nextcloud URL for correct link generation.
+
+## Related Documentation
+
+- [Configuration Guide](../configuration.md) - Environment variables and settings
+- [Authentication Modes](../authentication.md) - BasicAuth vs OAuth setup
+- [Installation Guide](../installation.md) - Getting started
+- [ADR-008: MCP Sampling for RAG](../ADR-008-mcp-sampling-for-rag.md) - Technical details on RAG workflows
@@ -0,0 +1,339 @@
+# Webhook Management Guide
+
+This guide explains how to enable and disable webhooks for vector sync in each MCP server deployment mode. Webhooks enable near-real-time synchronization of content changes to the vector database, complementing the default polling-based sync.
+
+**Related ADRs:**
+- ADR-010: Webhook-Based Vector Sync
+- ADR-020: Deployment Modes and Configuration Validation
+
+## Prerequisites
+
+Before enabling webhooks, ensure:
+
+1. **Nextcloud 30+** with `webhook_listeners` app enabled
+2. **[Astrolabe app](https://github.com/cbcoutinho/astrolabe)** installed in Nextcloud (provides settings UI and credentials API)
+3. **MCP server** accessible from Nextcloud via HTTP(S)
+4. **Vector sync enabled** on the MCP server
+
+## Webhook Architecture Overview
+
+The webhook system has two components:
+
+1. **Webhook Registration** - Configuring Nextcloud to send change notifications to the MCP server
+2. **Background Sync Credentials** - Allowing the MCP server to access Nextcloud APIs on behalf of users
+
+Both must be configured for webhooks to function properly.
+
+## Deployment Mode Specifics
+
+### 1. Single-User BasicAuth
+
+**Configuration:**
+```bash
+NEXTCLOUD_HOST=http://localhost:8080
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+VECTOR_SYNC_ENABLED=true
+```
+
+**Enable Webhooks:**
+1. Register webhooks using occ commands (requires Nextcloud admin):
+   ```bash
+   # Enable webhook_listeners app
+   php occ app:enable webhook_listeners
+
+   # Register webhooks for vector sync
+   php occ webhook_listeners:add \
+     --event "OCP\Files\Events\Node\NodeCreatedEvent" \
+     --uri "http://mcp-server:8000/webhooks/nextcloud" \
+     --method POST
+
+   # Repeat for other events (see Event Types below)
+   ```
+
+2. Optionally reduce polling frequency:
+   ```bash
+   VECTOR_SYNC_SCAN_INTERVAL=86400  # 24 hours
+   ```
+
+**Disable Webhooks:**
+```bash
+# List registered webhooks
+php occ webhook_listeners:list
+
+# Remove specific webhook by ID
+php occ webhook_listeners:remove <webhook-id>
+```
+
+**Notes:**
+- Simplest mode - admin credentials used for all operations
+- No per-user provisioning required
+- Background sync runs as the configured admin user
+
+---
+
+### 2. Multi-User BasicAuth Pass-Through
+
+**Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_MULTI_USER_BASIC_AUTH=true
+ENABLE_BACKGROUND_OPERATIONS=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/app/data/tokens.db
+VECTOR_SYNC_ENABLED=true
+# OAuth client for Astrolabe API access
+NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
+NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
+```
+
+**Credential Architecture:**
+This mode uses **two separate credential mechanisms**:
+
+1. **OAuth Session** (for management API access, including webhooks):
+   - Obtained via browser OAuth flow (`/oauth/login`)
+   - Stores refresh token in MCP server's `tokens.db`
+   - Used for webhook registration/management APIs
+
+2. **App Password** (for background sync):
+   - Generated in Nextcloud Security settings
+   - Stored encrypted in Nextcloud's `oc_preferences` via Astrolabe
+   - Used by background scanners to access Nextcloud APIs
+
+**Enable Webhooks:**
+
+#### Step 1: Complete OAuth Login (for Management API)
+Users must authorize the MCP server to access their Nextcloud:
+
+1. Navigate to **Nextcloud Settings → Astrolabe** (Personal settings)
+2. Click **"Authorize via OAuth"** under "Option 1"
+3. Complete OAuth consent flow
+4. Verify the page shows "Background Sync Access: Active"
+
+#### Step 2: Configure App Password (for Background Sync)
+Since OAuth refresh tokens have short expiry, users should also configure an app password:
+
+1. Navigate to **Nextcloud Settings → Security**
+2. Generate a new app password (name it "Astrolabe" or "MCP Server")
+3. Return to **Nextcloud Settings → Astrolabe**
+4. Under "Option 2: App Password", paste the app password
+5. Click **Save**
+
+#### Step 3: Register Webhooks (Admin)
+Same as Single-User BasicAuth:
+```bash
+php occ webhook_listeners:add \
+  --event "OCP\Files\Events\Node\NodeCreatedEvent" \
+  --uri "http://mcp-server:8003/webhooks/nextcloud" \
+  --method POST
+```
+
+**Disable Webhooks:**
+
+*Per-User:*
+1. Navigate to **Nextcloud Settings → Astrolabe**
+2. Click **"Revoke Access"** (for OAuth tokens) or **"Revoke Access"** (for app password)
+
+*System-Wide:*
+```bash
+php occ webhook_listeners:remove <webhook-id>
+```
+
+**Troubleshooting:**
+
+If OAuth login fails with "Access forbidden - Your client is not authorized":
+1. Check if OAuth client is registered:
+   ```sql
+   SELECT id, name, client_identifier FROM oc_oidc_clients
+   WHERE dcr = 1 ORDER BY id DESC LIMIT 5;
+   ```
+2. Restart MCP server to trigger DCR re-registration
+3. Verify `NEXTCLOUD_OIDC_CLIENT_ID` and `NEXTCLOUD_OIDC_CLIENT_SECRET` are set
+
+If background sync fails with "User no longer provisioned":
+1. Verify app password is stored:
+   ```sql
+   SELECT userid, configkey FROM oc_preferences
+   WHERE appid = 'astrolabe' AND userid = 'username';
+   ```
+2. Ensure user completed **both** OAuth login AND app password setup
+
+---
+
+### 3. OAuth Single-Audience (Default OAuth Mode)
+
+**Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+# No NEXTCLOUD_USERNAME/PASSWORD
+ENABLE_BACKGROUND_OPERATIONS=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/app/data/tokens.db
+VECTOR_SYNC_ENABLED=true
+```
+
+**Enable Webhooks:**
+
+#### Step 1: User Provisioning
+Users authorize via OAuth with `offline_access` scope:
+
+1. MCP client initiates OAuth flow
+2. User consents to requested scopes including `offline_access`
+3. MCP server stores refresh token for background operations
+
+Alternatively, via Astrolabe UI:
+1. Navigate to **Nextcloud Settings → Astrolabe**
+2. Click **"Authorize via OAuth"**
+3. Complete consent flow
+
+#### Step 2: Register Webhooks (Admin)
+```bash
+php occ webhook_listeners:add \
+  --event "OCP\Files\Events\Node\NodeCreatedEvent" \
+  --uri "http://mcp-server:8001/webhooks/nextcloud" \
+  --method POST
+```
+
+**Disable Webhooks:**
+
+*Per-User:*
+- Via Astrolabe UI: Click "Disable Indexing" or "Disconnect"
+- Via MCP tool: Use `revoke_nextcloud_access` if available
+
+*System-Wide:*
+```bash
+php occ webhook_listeners:remove <webhook-id>
+```
+
+---
+
+### 4. OAuth Token Exchange (RFC 8693)
+
+**Configuration:**
+```bash
+NEXTCLOUD_HOST=http://nextcloud.example.com
+ENABLE_TOKEN_EXCHANGE=true
+ENABLE_BACKGROUND_OPERATIONS=true
+TOKEN_ENCRYPTION_KEY=<key>
+TOKEN_STORAGE_DB=/app/data/tokens.db
+VECTOR_SYNC_ENABLED=true
+```
+
+**Enable/Disable Webhooks:**
+Same process as OAuth Single-Audience. The token exchange happens transparently when the MCP server accesses Nextcloud APIs.
+
+---
+
+### 5. Smithery Stateless
+
+**Configuration:**
+- Configuration from session URL params
+- `VECTOR_SYNC_ENABLED=false` (required)
+
+**Webhooks:**
+**Not supported.** This mode is stateless with no persistent storage or background operations.
+
+---
+
+## Webhook Event Types
+
+Register these webhook events for full vector sync coverage:
+
+### File/Note Events
+```bash
+# Use BeforeNodeDeletedEvent for deletions (includes node.id)
+php occ webhook_listeners:add --event "OCP\Files\Events\Node\NodeCreatedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCP\Files\Events\Node\NodeWrittenEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCP\Files\Events\Node\BeforeNodeDeletedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+```
+
+### Calendar Events
+```bash
+php occ webhook_listeners:add --event "OCP\Calendar\Events\CalendarObjectCreatedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCP\Calendar\Events\CalendarObjectUpdatedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCP\Calendar\Events\CalendarObjectDeletedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+```
+
+### Tables Events
+```bash
+php occ webhook_listeners:add --event "OCA\Tables\Event\RowAddedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCA\Tables\Event\RowUpdatedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+php occ webhook_listeners:add --event "OCA\Tables\Event\RowDeletedEvent" --uri "$MCP_URL/webhooks/nextcloud"
+```
+
+## Security Considerations
+
+### Webhook Authentication
+Configure `WEBHOOK_SECRET` to require authentication for incoming webhooks:
+
+```bash
+# MCP Server
+WEBHOOK_SECRET=<generate-random-secret>
+
+# Nextcloud webhook registration
+php occ webhook_listeners:add \
+  --event "..." \
+  --uri "$MCP_URL/webhooks/nextcloud" \
+  --header "Authorization: Bearer <secret>"
+```
+
+### Token Storage
+- Refresh tokens and app passwords are encrypted using `TOKEN_ENCRYPTION_KEY`
+- Store the key securely (environment variable, secrets manager)
+- Different users have isolated credential storage
+
+## Monitoring
+
+### MCP Server Logs
+```bash
+# Docker
+docker compose logs mcp-multi-user-basic | grep -i webhook
+
+# Key log messages
+# - "Queued document from webhook: ..." - Success
+# - "Webhook authentication failed" - Auth error
+# - "User X no longer provisioned" - Missing credentials
+```
+
+### Nextcloud Logs
+```bash
+docker compose exec app cat /var/www/html/data/nextcloud.log | \
+  jq 'select(.message | contains("webhook"))' | tail
+```
+
+### Database Checks
+```sql
+-- Check registered webhooks
+SELECT * FROM oc_webhook_listeners;
+
+-- Check OAuth clients
+SELECT id, name, token_type FROM oc_oidc_clients WHERE dcr = 1;
+
+-- Check user credentials stored by Astrolabe app
+SELECT userid, configkey FROM oc_preferences WHERE appid = 'astrolabe';
+```
+
+## Common Issues
+
+### "Access forbidden - Your client is not authorized to connect"
+**Cause:** OAuth client registration expired or not present in Nextcloud
+**Fix:** Restart MCP server to trigger DCR re-registration
+
+### "User X no longer provisioned, stopping scanner"
+**Cause:** Background sync credentials missing or expired
+**Fix:** User must complete credential provisioning (see mode-specific steps)
+
+### "Failed to fetch" in browser console during OAuth
+**Cause:** Network issue between browser and MCP server callback endpoint
+**Fix:** Verify MCP server is accessible at the configured `NEXTCLOUD_MCP_SERVER_URL`
+
+### Webhooks not firing
+**Causes:**
+1. `webhook_listeners` app not enabled
+2. Webhook not registered for the event type
+3. Background job workers not running
+**Fix:**
+```bash
+php occ app:enable webhook_listeners
+php occ background:cron  # or configure systemd cron
+```
@@ -1,198 +1,249 @@
-# Nextcloud Instance
+# ============================================
+# DEPLOYMENT MODE SELECTION
+# ============================================
+# Optional: Explicitly declare deployment mode (ADR-021)
+# If not set, mode is auto-detected from other settings
+# Valid values: single_user_basic, multi_user_basic, oauth_single_audience,
+#               oauth_token_exchange, smithery
+#
+# Recommendation: Set this for clarity and to catch configuration errors early
+#MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# ============================================
+# COMMON SETTINGS (Required for all modes)
+# ============================================
+# Your Nextcloud instance URL (without trailing slash)
 NEXTCLOUD_HOST=

-# ===== AUTHENTICATION MODE =====
-# Choose ONE of the following:
-
-# Option 1: OAuth2/OIDC (RECOMMENDED - More Secure)
-# - Requires Nextcloud OIDC app installed and configured
-# - Admin must enable "Dynamic Client Registration" in OIDC app settings
-# - Leave NEXTCLOUD_USERNAME and NEXTCLOUD_PASSWORD empty to use OAuth mode
-# - OAuth client credentials are stored encrypted in SQLite (TOKEN_STORAGE_DB)
-# - Optional: Pre-register client and provide credentials (otherwise auto-registers)
-NEXTCLOUD_OIDC_CLIENT_ID=
-NEXTCLOUD_OIDC_CLIENT_SECRET=
-NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
-
-# OAuth Storage Configuration (SQLite storage for OAuth clients and refresh tokens)
-# TOKEN_ENCRYPTION_KEY: Required for encrypting OAuth client secrets and refresh tokens
-# Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
-#TOKEN_ENCRYPTION_KEY=
-# TOKEN_STORAGE_DB: Path to SQLite database (default: /app/data/tokens.db)
-#TOKEN_STORAGE_DB=/app/data/tokens.db
-
-# ===== ADR-004 PROGRESSIVE CONSENT CONFIGURATION =====
-# Enable Progressive Consent mode (dual OAuth flows)
-# When enabled: Flow 1 for client auth, Flow 2 for Nextcloud resource access
-# When disabled: Uses existing hybrid flow (backward compatible)
-
-# MCP Server OAuth Client Configuration
-# The MCP server's own OAuth client credentials for Flow 2
-# If not set, will use dynamic client registration
-#MCP_SERVER_CLIENT_ID=
-#MCP_SERVER_CLIENT_SECRET=
-
-# Allowed MCP Client IDs (comma-separated list)
-# Client IDs that are allowed to authenticate in Flow 1
-# Examples: claude-desktop,continue-dev,zed-editor
-#ALLOWED_MCP_CLIENTS=claude-desktop,continue-dev,zed-editor
-
-# Token cache configuration for Token Broker Service
-# Cache TTL in seconds (default: 300 = 5 minutes)
-#TOKEN_CACHE_TTL=300
-# Early refresh threshold in seconds (default: 30)
-#TOKEN_CACHE_EARLY_REFRESH=30
-
-# Option 2: Basic Authentication (LEGACY - Less Secure)
-# - Requires username and password
-# - Credentials stored in environment variables
-# - Use only for backward compatibility or if OAuth unavailable
-# - If these are set, OAuth mode is disabled
+# ============================================
+# SINGLE-USER BASICAUTH MODE
+# ============================================
+# Simplest deployment - one user, credentials in environment
+# Use for: Personal instances, local development, testing
+#
+# Required:
 NEXTCLOUD_USERNAME=
 NEXTCLOUD_PASSWORD=
+#
+# Optional features (semantic search, document processing):
+# See "Optional Features" section below

 # ============================================
-# Document Processing Configuration
+# MULTI-USER BASICAUTH MODE
 # ============================================
-# Enable document processing (PDF, DOCX, images, etc.)
-# Set to false to disable all document processing
-ENABLE_DOCUMENT_PROCESSING=false
-
-# Default processor to use when multiple are available
-# Options: unstructured, tesseract, custom
-DOCUMENT_PROCESSOR=unstructured
+# Users provide credentials in request headers (pass-through)
+# Use for: Multi-user without OAuth, simple shared deployments
+#
+# Required:
+#ENABLE_MULTI_USER_BASIC_AUTH=true
+#
+# Optional - Background Operations (for semantic search, future features):
+# Enable background token storage using app passwords (via Astrolabe)
+# Required for semantic search in multi-user mode
+# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
+#ENABLE_BACKGROUND_OPERATIONS=true
+#NEXTCLOUD_OIDC_CLIENT_ID=
+#NEXTCLOUD_OIDC_CLIENT_SECRET=
+#TOKEN_ENCRYPTION_KEY=
+#TOKEN_STORAGE_DB=/app/data/tokens.db
+#
+# Optional features (semantic search, document processing):
+# See "Optional Features" section below

 # ============================================
-# Unstructured.io Processor
+# OAUTH SINGLE-AUDIENCE MODE (Recommended)
 # ============================================
-# Enable Unstructured processor (requires unstructured service in docker-compose)
-# This is a cloud-based/API processor supporting many document types
-ENABLE_UNSTRUCTURED=false
-
-# Unstructured API endpoint
-UNSTRUCTURED_API_URL=http://unstructured:8000
-
-# Request timeout in seconds (default: 120)
-# OCR operations can take 30-120 seconds for large documents
-UNSTRUCTURED_TIMEOUT=120
-
-# Parsing strategy: auto, fast, hi_res
-# - auto: Automatically choose based on document type
-# - fast: Fast parsing without OCR
-# - hi_res: High-resolution with OCR (slowest, most accurate)
-UNSTRUCTURED_STRATEGY=auto
-
-# OCR languages (comma-separated ISO 639-3 codes)
-# Common: eng=English, deu=German, fra=French, spa=Spanish
-UNSTRUCTURED_LANGUAGES=eng,deu
-
-# Progress reporting interval in seconds (default: 10)
-# During long-running OCR operations, progress notifications are sent to the MCP client
-# at this interval to prevent timeouts and provide status updates
-PROGRESS_INTERVAL=10
+# Multi-user OAuth with single-audience tokens
+# Use for: Multi-user production deployments, enhanced security
+# Tokens work for both MCP server and Nextcloud APIs (pass-through)
+#
+# Required: None (uses Dynamic Client Registration if credentials not provided)
+#
+# Optional - Pre-registered OAuth Client:
+# If you pre-register the client instead of using DCR:
+#NEXTCLOUD_OIDC_CLIENT_ID=
+#NEXTCLOUD_OIDC_CLIENT_SECRET=
+#
+# Optional - Background Operations (for semantic search, future features):
+# Enable refresh token storage for offline access
+# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
+#ENABLE_BACKGROUND_OPERATIONS=true
+#TOKEN_ENCRYPTION_KEY=
+#TOKEN_STORAGE_DB=/app/data/tokens.db
+#
+# Optional - Custom OIDC Discovery:
+# Auto-detected from NEXTCLOUD_HOST if not set
+#NEXTCLOUD_OIDC_DISCOVERY_URL=
+#
+# Optional - Custom Scopes:
+# Default: openid profile email offline_access notes:* calendar:* contacts:* tables:* webdav:* deck:* cookbook:*
+#NEXTCLOUD_OIDC_SCOPES=openid profile email notes:* calendar:*
+#
+# MCP Server URL (for OAuth redirects):
+#NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
+#
+# Optional features (semantic search, document processing):
+# See "Optional Features" section below

 # ============================================
-# Tesseract Processor (Local OCR)
+# OAUTH TOKEN EXCHANGE MODE (Advanced)
 # ============================================
-# Enable Tesseract processor (requires tesseract binary installed)
-# This is a local, lightweight OCR solution for images only
-ENABLE_TESSERACT=false
-
-# Path to tesseract executable (optional, auto-detected if in PATH)
-#TESSERACT_CMD=/usr/bin/tesseract
-
-# OCR language (e.g., eng, deu, eng+deu for multiple)
-TESSERACT_LANG=eng
+# Multi-user OAuth with RFC 8693 token exchange
+# Use for: Advanced deployments requiring separate MCP and Nextcloud tokens
+# MCP tokens are separate from Nextcloud tokens
+#
+# Required:
+#ENABLE_TOKEN_EXCHANGE=true
+#
+# Optional - Pre-registered OAuth Client:
+# If you pre-register the client instead of using DCR:
+#NEXTCLOUD_OIDC_CLIENT_ID=
+#NEXTCLOUD_OIDC_CLIENT_SECRET=
+#
+# Optional - Token Exchange Configuration:
+# Cache TTL in seconds (default: 300 = 5 minutes)
+#TOKEN_EXCHANGE_CACHE_TTL=300
+#
+# Optional - Background Operations:
+# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
+#ENABLE_BACKGROUND_OPERATIONS=true
+#TOKEN_ENCRYPTION_KEY=
+#TOKEN_STORAGE_DB=/app/data/tokens.db
+#
+# Optional - Custom OIDC Discovery:
+#NEXTCLOUD_OIDC_DISCOVERY_URL=
+#
+# MCP Server URL (for OAuth redirects):
+#NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
+#
+# Optional features (semantic search, document processing):
+# See "Optional Features" section below

 # ============================================
-# Custom Processor (Your own API)
+# SMITHERY STATELESS MODE
 # ============================================
-# Enable custom document processor via HTTP API
-ENABLE_CUSTOM_PROCESSOR=false
-
-# Unique name for your processor
-#CUSTOM_PROCESSOR_NAME=my_ocr
-
-# Your custom processor API endpoint
-#CUSTOM_PROCESSOR_URL=http://localhost:9000/process
-
-# Optional API key for authentication
-#CUSTOM_PROCESSOR_API_KEY=your-api-key-here
-
-# Request timeout in seconds
-#CUSTOM_PROCESSOR_TIMEOUT=60
-
-# Comma-separated MIME types your processor supports
-#CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg,image/png
+# Stateless multi-tenant deployment for Smithery platform
+# Configuration comes from session URL parameters
+# No persistent storage, no OAuth, no vector sync
+#
+# Required: None (all config from session URL)
+# This mode is activated automatically when deployed to Smithery

 # ============================================
-# Semantic Search & Vector Sync Configuration
+# OPTIONAL FEATURES (All Deployment Modes)
 # ============================================
-# EXPERIMENTAL: Semantic search for Notes app (multi-app support planned)
-# Requires: Qdrant vector database + Ollama embedding service
-# Disabled by default

-# Enable background vector indexing
-VECTOR_SYNC_ENABLED=false
+# ===== SEMANTIC SEARCH =====
+# AI-powered semantic search across Nextcloud content
+# Requires: Qdrant vector database + embedding provider (Ollama, Bedrock, or Simple fallback)
+#
+# Enable semantic search:
+#ENABLE_SEMANTIC_SEARCH=true
+#
+# Note for Multi-User Modes:
+# ENABLE_SEMANTIC_SEARCH automatically enables background operations when needed
+# No need to set ENABLE_BACKGROUND_OPERATIONS separately
+# The server will automatically request refresh tokens and store them encrypted
+#
+# Vector Database - Choose ONE mode:
+# 1. In-memory (default): Set neither QDRANT_URL nor QDRANT_LOCATION
+# 2. Persistent local: Set QDRANT_LOCATION=/path/to/data
+# 3. Network: Set QDRANT_URL=http://qdrant:6333
+#
+#QDRANT_URL=http://qdrant:6333
+#QDRANT_LOCATION=:memory:
+#QDRANT_API_KEY=
+#QDRANT_COLLECTION=nextcloud_content
+#
+# Embedding Provider - Choose ONE:
+# 1. Ollama (recommended for local deployment):
+#OLLAMA_BASE_URL=http://ollama:11434
+#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+#OLLAMA_VERIFY_SSL=true
+#
+# 2. Amazon Bedrock (for AWS deployments):
+#AWS_REGION=us-east-1
+#BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+# Optional: AWS credentials (uses credential chain if not set)
+#AWS_ACCESS_KEY_ID=
+#AWS_SECRET_ACCESS_KEY=
+#
+# 3. Simple (automatic fallback, no configuration needed)
+# Uses basic in-memory embeddings if no provider configured
+#
+# Document Chunking:
+# Configure how documents are split before embedding
+#DOCUMENT_CHUNK_SIZE=512
+#DOCUMENT_CHUNK_OVERLAP=50

+# ===== SEMANTIC SEARCH TUNING =====
+# Advanced parameters for vector sync background operations
+# Only modify if you understand the implications
+#
 # Document scan interval in seconds (default: 300 = 5 minutes)
-# How often to check for new/updated documents
 #VECTOR_SYNC_SCAN_INTERVAL=300
-
+#
 # Concurrent indexing workers (default: 3)
-# Number of parallel workers for embedding generation
 #VECTOR_SYNC_PROCESSOR_WORKERS=3
-
+#
 # Max queued documents (default: 10000)
-# Maximum documents waiting to be processed
 #VECTOR_SYNC_QUEUE_MAX_SIZE=10000

-# ============================================
-# Qdrant Vector Database Configuration
-# ============================================
-# Choose ONE of three modes:
-# 1. In-memory mode (default): Set neither QDRANT_URL nor QDRANT_LOCATION
-# 2. Persistent local: Set QDRANT_LOCATION=/path/to/data
-# 3. Network mode: Set QDRANT_URL=http://qdrant:6333
+# ===== DOCUMENT PROCESSING =====
+# Extract text from PDFs, images, DOCX, etc. for semantic search
+# Disabled by default
+#
+#ENABLE_DOCUMENT_PROCESSING=false
+#DOCUMENT_PROCESSOR=unstructured
+#
+# Unstructured.io Processor (recommended):
+#ENABLE_UNSTRUCTURED=false
+#UNSTRUCTURED_API_URL=http://unstructured:8000
+#UNSTRUCTURED_TIMEOUT=120
+#UNSTRUCTURED_STRATEGY=auto
+#UNSTRUCTURED_LANGUAGES=eng,deu
+#PROGRESS_INTERVAL=10
+#
+# Tesseract OCR (lightweight, images only):
+#ENABLE_TESSERACT=false
+#TESSERACT_CMD=/usr/bin/tesseract
+#TESSERACT_LANG=eng
+#
+# Custom Processor (your own API):
+#ENABLE_CUSTOM_PROCESSOR=false
+#CUSTOM_PROCESSOR_NAME=my_ocr
+#CUSTOM_PROCESSOR_URL=http://localhost:9000/process
+#CUSTOM_PROCESSOR_API_KEY=
+#CUSTOM_PROCESSOR_TIMEOUT=60
+#CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg,image/png

-# Network mode: URL to Qdrant service
-#QDRANT_URL=http://qdrant:6333
+# ===== SSL/TLS =====
+# For Nextcloud behind reverse proxies with self-signed or private CA certificates
+#
+# Disable TLS certificate verification (insecure, development only):
+#NEXTCLOUD_VERIFY_SSL=false
+#
+# Use a custom CA bundle (path to PEM file):
+#NEXTCLOUD_CA_BUNDLE=/etc/ssl/certs/my-ca.pem
+#
+# Docker example: mount the CA bundle as a volume
+#   docker run -v /path/to/ca.pem:/etc/ssl/certs/my-ca.pem:ro \
+#     -e NEXTCLOUD_CA_BUNDLE=/etc/ssl/certs/my-ca.pem ...

-# Local mode: Path to store vectors (use :memory: for in-memory)
-#QDRANT_LOCATION=:memory:
-
-# API key for network mode (optional)
-#QDRANT_API_KEY=
-
-# Collection name (optional - auto-generated if not set)
-# Auto-generation format: {deployment-id}-{model-name}
-# Allows safe model switching and multi-server deployments
-#QDRANT_COLLECTION=nextcloud_content
+# ===== SECURITY & ADVANCED =====
+# Cookie security (browser UI)
+# Auto-detects from NEXTCLOUD_HOST protocol if not set
+#COOKIE_SECURE=true

 # ============================================
-# Ollama Embedding Service Configuration
+# DEPRECATED VARIABLES (Backward Compatibility)
 # ============================================
-# Ollama endpoint for embeddings (if not set, uses SimpleEmbeddingProvider fallback)
-#OLLAMA_BASE_URL=http://ollama:11434
-
-# Embedding model to use (default: nomic-embed-text, 768 dimensions)
-# Changing this creates a new collection (requires re-embedding all documents)
-#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
-
-# Verify SSL certificates (default: true)
-#OLLAMA_VERIFY_SSL=true
-
-# ============================================
-# Document Chunking Configuration
-# ============================================
-# Configure how documents are split before embedding
-
-# Words per chunk (default: 512)
-# Smaller chunks (256-384): More precise, less context, more storage
-# Larger chunks (768-1024): More context, less precise, less storage
-#DOCUMENT_CHUNK_SIZE=512
-
-# Overlapping words between chunks (default: 50)
-# Recommended: 10-20% of chunk size
-# Preserves context across chunk boundaries
-#DOCUMENT_CHUNK_OVERLAP=50
+# These variables still work but will be removed in v1.0.0
+# Please migrate to new names:
+#
+# Old Name                  → New Name
+# VECTOR_SYNC_ENABLED      → ENABLE_SEMANTIC_SEARCH
+# ENABLE_OFFLINE_ACCESS    → ENABLE_BACKGROUND_OPERATIONS
+#
+# Migration is optional - both old and new names work
+# Deprecation warnings will be logged when old names are used
@@ -0,0 +1,80 @@
+# ============================================
+# OAUTH TOKEN EXCHANGE QUICK START (Advanced)
+# ============================================
+# Advanced OAuth deployment with RFC 8693 token exchange
+# Use for: Deployments requiring separate MCP and Nextcloud tokens
+# Features: Dual-audience tokens, enhanced security boundaries
+#
+# Copy this file to .env and configure
+
+# ===== REQUIRED SETTINGS =====
+# Your Nextcloud instance URL (without trailing slash)
+NEXTCLOUD_HOST=https://nextcloud.example.com
+
+# Enable token exchange mode
+ENABLE_TOKEN_EXCHANGE=true
+
+# ===== REQUIRED: LEAVE USERNAME/PASSWORD EMPTY =====
+# OAuth mode activates when these are NOT set
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
+# Recommended for clarity
+MCP_DEPLOYMENT_MODE=oauth_token_exchange
+
+# ===== OPTIONAL: PRE-REGISTERED OAUTH CLIENT =====
+# If you pre-register the OAuth client instead of using DCR:
+#NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
+#NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
+
+# MCP Server URL (for OAuth redirects)
+NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
+
+# ===== OPTIONAL: TOKEN EXCHANGE TUNING =====
+# Cache TTL for exchanged tokens (default: 300 seconds = 5 minutes)
+TOKEN_EXCHANGE_CACHE_TTL=300
+
+# ===== OPTIONAL: SEMANTIC SEARCH =====
+# AI-powered semantic search with automatic background operation setup
+#
+# Note: ENABLE_SEMANTIC_SEARCH automatically enables background operations
+# in token exchange mode, just like in OAuth single-audience mode
+#
+ENABLE_SEMANTIC_SEARCH=true
+
+# Vector Database (required for semantic search)
+QDRANT_URL=http://qdrant:6333
+
+# Embedding Provider (required for semantic search)
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Token Storage (required for background operations - auto-enabled by semantic search)
+# Generate encryption key: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+TOKEN_ENCRYPTION_KEY=your-encryption-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+
+# ===== OPTIONAL: DOCUMENT PROCESSING =====
+# Extract text from PDFs, images, DOCX for semantic search
+#ENABLE_DOCUMENT_PROCESSING=true
+#ENABLE_UNSTRUCTURED=true
+#UNSTRUCTURED_API_URL=http://unstructured:8000
+
+# ===== TOKEN EXCHANGE MODE EXPLANATION =====
+# In this mode:
+# 1. MCP clients authenticate with tokens scoped to "mcp-server" audience
+# 2. Server exchanges MCP tokens for Nextcloud tokens on each request
+# 3. Provides clear separation between MCP session and Nextcloud access
+# 4. Enables fine-grained token lifecycle management
+#
+# When to use:
+# - Strict security requirements (separate token contexts)
+# - Complex multi-service architectures
+# - Need independent token expiration policies
+#
+# When NOT to use:
+# - Simple deployments (use oauth_single_audience instead)
+# - High-performance requirements (token exchange adds latency)
+
+# For more configuration options, see env.sample
@@ -0,0 +1,77 @@
+# ============================================
+# OAUTH MULTI-USER QUICK START (Recommended)
+# ============================================
+# Multi-user deployment with OAuth authentication
+# Use for: Multi-user production deployments, enhanced security
+# Features: Single-audience tokens, automatic client registration (DCR)
+#
+# Copy this file to .env and configure
+
+# ===== REQUIRED SETTINGS =====
+# Your Nextcloud instance URL (without trailing slash)
+NEXTCLOUD_HOST=https://nextcloud.example.com
+
+# ===== REQUIRED: LEAVE USERNAME/PASSWORD EMPTY =====
+# OAuth mode activates when these are NOT set
+NEXTCLOUD_USERNAME=
+NEXTCLOUD_PASSWORD=
+
+# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
+# Recommended for clarity
+MCP_DEPLOYMENT_MODE=oauth_single_audience
+
+# ===== OPTIONAL: PRE-REGISTERED OAUTH CLIENT =====
+# If you pre-register the OAuth client instead of using DCR:
+#NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
+#NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
+
+# MCP Server URL (for OAuth redirects)
+NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
+
+# ===== OPTIONAL: SEMANTIC SEARCH (Recommended) =====
+# AI-powered semantic search with automatic background operation setup
+#
+# When you enable semantic search in multi-user mode:
+# 1. ENABLE_SEMANTIC_SEARCH automatically enables background operations
+# 2. Server requests refresh tokens for offline indexing
+# 3. Tokens are stored encrypted in TOKEN_STORAGE_DB
+# 4. No need to set ENABLE_BACKGROUND_OPERATIONS separately!
+#
+ENABLE_SEMANTIC_SEARCH=true
+
+# Vector Database (required for semantic search)
+QDRANT_URL=http://qdrant:6333
+# OR for in-memory mode:
+#QDRANT_LOCATION=:memory:
+
+# Embedding Provider (required for semantic search)
+# Option 1: Ollama (recommended for local deployment)
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Option 2: Amazon Bedrock (for AWS deployments)
+#AWS_REGION=us-east-1
+#BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
+
+# Token Storage (required for background operations - auto-enabled by semantic search)
+# Generate encryption key: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+TOKEN_ENCRYPTION_KEY=your-encryption-key-here
+TOKEN_STORAGE_DB=/app/data/tokens.db
+
+# ===== OPTIONAL: DOCUMENT PROCESSING =====
+# Extract text from PDFs, images, DOCX for semantic search
+#ENABLE_DOCUMENT_PROCESSING=true
+#ENABLE_UNSTRUCTURED=true
+#UNSTRUCTURED_API_URL=http://unstructured:8000
+
+# ===== SUMMARY OF AUTO-ENABLEMENT =====
+# With ENABLE_SEMANTIC_SEARCH=true in OAuth mode:
+# ✅ Background operations enabled automatically
+# ✅ Refresh token storage enabled automatically
+# ✅ OAuth credentials required (DCR or pre-registered)
+# ✅ Encryption key required for token storage
+#
+# You only need to set ENABLE_SEMANTIC_SEARCH and provide the required
+# infrastructure (Qdrant, Ollama, encryption key). The rest is automatic!
+
+# For more advanced configuration, see env.sample
@@ -0,0 +1,37 @@
+# ============================================
+# SINGLE-USER BASICAUTH QUICK START
+# ============================================
+# Simplest deployment mode - one user, credentials in environment
+# Use for: Personal instances, local development, testing
+#
+# Copy this file to .env and fill in your credentials
+
+# ===== REQUIRED SETTINGS =====
+# Your Nextcloud instance URL (without trailing slash)
+NEXTCLOUD_HOST=http://localhost:8080
+
+# Your Nextcloud credentials
+NEXTCLOUD_USERNAME=admin
+NEXTCLOUD_PASSWORD=password
+
+# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
+# Recommended to avoid ambiguity
+MCP_DEPLOYMENT_MODE=single_user_basic
+
+# ===== OPTIONAL: SEMANTIC SEARCH =====
+# Uncomment to enable AI-powered semantic search
+# Requires: Qdrant + embedding provider (Ollama or Bedrock)
+#
+#ENABLE_SEMANTIC_SEARCH=true
+#QDRANT_LOCATION=:memory:
+#OLLAMA_BASE_URL=http://ollama:11434
+#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# ===== OPTIONAL: DOCUMENT PROCESSING =====
+# Extract text from PDFs, images, DOCX for semantic search
+#ENABLE_DOCUMENT_PROCESSING=true
+#ENABLE_UNSTRUCTURED=true
+#UNSTRUCTURED_API_URL=http://unstructured:8000
+
+# That's it! Single-user mode is the simplest to configure.
+# For more options, see env.sample
@@ -0,0 +1,133 @@
+"""Alembic environment configuration for nextcloud-mcp-server.
+
+This module configures how Alembic runs database migrations for the
+token storage database. It supports both online and offline migration modes.
+
+Uses anyio for async operations, consistent with the project's async patterns.
+"""
+
+import logging
+from pathlib import Path
+
+import anyio
+from sqlalchemy import pool
+from sqlalchemy.engine import Connection
+from sqlalchemy.ext.asyncio import async_engine_from_config
+
+from alembic import context
+
+# Configure logging
+logger = logging.getLogger("alembic.env")
+
+# This is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Update script location to point to package location
+# This allows alembic to find migrations when installed in site-packages
+script_location = Path(__file__).parent
+config.set_main_option("script_location", str(script_location))
+
+# We don't use SQLAlchemy models, so target_metadata is None
+# Migrations will be written manually using op.execute() for raw SQL
+target_metadata = None
+
+
+def get_database_url() -> str:
+    """
+    Get the database URL from Alembic config or environment.
+
+    The URL can be set in alembic.ini or passed via -x database_url=...
+    when running Alembic commands.
+
+    Returns:
+        Database URL (SQLite URL format)
+    """
+    # Check if URL is passed via -x database_url=...
+    url = context.get_x_argument(as_dictionary=True).get("database_url")
+
+    if not url:
+        # Fall back to alembic.ini configuration
+        url = config.get_main_option("sqlalchemy.url")
+
+    if not url:
+        # Default to /app/data/tokens.db for Docker deployments
+        db_path = Path("/app/data/tokens.db")
+        url = f"sqlite+aiosqlite:///{db_path}"
+        logger.warning(
+            f"No database URL configured, using default: {url}. "
+            "Set sqlalchemy.url in alembic.ini or pass -x database_url=..."
+        )
+
+    return url
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL and not an Engine,
+    though an Engine is acceptable here as well. By skipping the
+    Engine creation we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    This mode is useful for generating SQL scripts without database access.
+    """
+    url = get_database_url()
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def do_run_migrations(connection: Connection) -> None:
+    """Execute migrations within a database connection."""
+    context.configure(connection=connection, target_metadata=target_metadata)
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+async def run_async_migrations() -> None:
+    """Run migrations in 'online' mode with async support.
+
+    In this scenario we create an async Engine and associate
+    a connection with the context.
+    """
+    # Get database URL and update config
+    url = get_database_url()
+    config.set_main_option("sqlalchemy.url", url)
+
+    # Create async engine
+    connectable = async_engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,  # Don't pool connections for migrations
+    )
+
+    async with connectable.connect() as connection:
+        await connection.run_sync(do_run_migrations)
+
+    await connectable.dispose()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode.
+
+    This function is called from storage.py's initialize() method via
+    anyio.to_thread.run_sync(), so it always runs in a worker thread
+    with its own event loop. We can safely use anyio.run() here.
+    """
+    anyio.run(run_async_migrations)
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
@@ -0,0 +1,185 @@
+"""Initial schema for token storage database
+
+This migration creates the initial database schema including:
+- refresh_tokens: OAuth refresh tokens and user profiles
+- audit_logs: Audit trail for security events
+- oauth_clients: OAuth client credentials (DCR)
+- oauth_sessions: OAuth flow session state (ADR-004 Progressive Consent)
+- registered_webhooks: Webhook registration tracking (both OAuth and BasicAuth)
+- schema_version: Legacy schema version tracking (deprecated, use alembic_version)
+
+Revision ID: 001
+Revises:
+Create Date: 2025-12-17 22:00:00.000000
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "001"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Create initial database schema."""
+
+    # Refresh tokens table (OAuth mode only, for background jobs)
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS refresh_tokens (
+            user_id TEXT PRIMARY KEY,
+            encrypted_token BLOB NOT NULL,
+            expires_at INTEGER,
+            created_at INTEGER NOT NULL,
+            updated_at INTEGER NOT NULL,
+            -- ADR-004 Progressive Consent fields
+            flow_type TEXT DEFAULT 'hybrid',
+            token_audience TEXT DEFAULT 'nextcloud',
+            provisioned_at INTEGER,
+            provisioning_client_id TEXT,
+            scopes TEXT,
+            -- Browser session profile cache
+            user_profile TEXT,
+            profile_cached_at INTEGER
+        )
+        """
+    )
+
+    # Audit logs table (both OAuth and BasicAuth modes)
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS audit_logs (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            timestamp INTEGER NOT NULL,
+            event TEXT NOT NULL,
+            user_id TEXT NOT NULL,
+            resource_type TEXT,
+            resource_id TEXT,
+            auth_method TEXT,
+            hostname TEXT
+        )
+        """
+    )
+
+    # Index on audit logs for efficient queries
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_audit_user_timestamp
+        ON audit_logs(user_id, timestamp)
+        """
+    )
+
+    # OAuth client credentials storage (OAuth mode only)
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS oauth_clients (
+            id INTEGER PRIMARY KEY,
+            client_id TEXT UNIQUE NOT NULL,
+            encrypted_client_secret BLOB NOT NULL,
+            client_id_issued_at INTEGER NOT NULL,
+            client_secret_expires_at INTEGER NOT NULL,
+            redirect_uris TEXT NOT NULL,
+            encrypted_registration_access_token BLOB,
+            registration_client_uri TEXT,
+            created_at INTEGER NOT NULL,
+            updated_at INTEGER NOT NULL
+        )
+        """
+    )
+
+    # OAuth flow sessions (ADR-004 Progressive Consent)
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS oauth_sessions (
+            session_id TEXT PRIMARY KEY,
+            client_id TEXT,
+            client_redirect_uri TEXT NOT NULL,
+            state TEXT,
+            code_challenge TEXT,
+            code_challenge_method TEXT,
+            mcp_authorization_code TEXT UNIQUE,
+            idp_access_token TEXT,
+            idp_refresh_token TEXT,
+            user_id TEXT,
+            created_at INTEGER NOT NULL,
+            expires_at INTEGER NOT NULL,
+            -- ADR-004 Progressive Consent fields
+            flow_type TEXT DEFAULT 'hybrid',
+            requested_scopes TEXT,
+            granted_scopes TEXT,
+            is_provisioning BOOLEAN DEFAULT FALSE
+        )
+        """
+    )
+
+    # Index for MCP authorization code lookups
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_oauth_sessions_mcp_code
+        ON oauth_sessions(mcp_authorization_code)
+        """
+    )
+
+    # Legacy schema version tracking table
+    # NOTE: This is deprecated in favor of Alembic's alembic_version table
+    # Kept for backward compatibility with pre-Alembic databases
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS schema_version (
+            version INTEGER PRIMARY KEY,
+            applied_at REAL NOT NULL
+        )
+        """
+    )
+
+    # Registered webhooks tracking (both BasicAuth and OAuth modes)
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS registered_webhooks (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            webhook_id INTEGER NOT NULL UNIQUE,
+            preset_id TEXT NOT NULL,
+            created_at REAL NOT NULL
+        )
+        """
+    )
+
+    # Indexes for efficient webhook queries
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_webhooks_preset
+        ON registered_webhooks(preset_id)
+        """
+    )
+
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_webhooks_created
+        ON registered_webhooks(created_at)
+        """
+    )
+
+
+def downgrade() -> None:
+    """Drop all tables and indexes.
+
+    WARNING: This will destroy all data in the database!
+    Use with extreme caution.
+    """
+
+    # Drop indexes first
+    op.execute("DROP INDEX IF EXISTS idx_webhooks_created")
+    op.execute("DROP INDEX IF EXISTS idx_webhooks_preset")
+    op.execute("DROP INDEX IF EXISTS idx_oauth_sessions_mcp_code")
+    op.execute("DROP INDEX IF EXISTS idx_audit_user_timestamp")
+
+    # Drop tables
+    op.execute("DROP TABLE IF EXISTS registered_webhooks")
+    op.execute("DROP TABLE IF EXISTS schema_version")
+    op.execute("DROP TABLE IF EXISTS oauth_sessions")
+    op.execute("DROP TABLE IF EXISTS oauth_clients")
+    op.execute("DROP TABLE IF EXISTS audit_logs")
+    op.execute("DROP TABLE IF EXISTS refresh_tokens")
@@ -0,0 +1,50 @@
+"""Add app_passwords table for multi-user BasicAuth mode
+
+This migration adds support for storing app passwords that are provisioned
+via Astrolabe's personal settings. This enables background sync in
+multi-user BasicAuth mode without requiring OAuth.
+
+Revision ID: 002
+Revises: 001
+Create Date: 2026-01-13 12:00:00.000000
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "002"
+down_revision = "001"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Add app_passwords table for multi-user BasicAuth mode."""
+
+    # App passwords table for multi-user BasicAuth background sync
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS app_passwords (
+            user_id TEXT PRIMARY KEY,
+            encrypted_password BLOB NOT NULL,
+            created_at INTEGER NOT NULL,
+            updated_at INTEGER NOT NULL
+        )
+        """
+    )
+
+    # Index for efficient user lookups
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_app_passwords_updated
+        ON app_passwords(updated_at)
+        """
+    )
+
+
+def downgrade() -> None:
+    """Drop app_passwords table."""
+
+    op.execute("DROP INDEX IF EXISTS idx_app_passwords_updated")
+    op.execute("DROP TABLE IF EXISTS app_passwords")
@@ -0,0 +1,95 @@
+"""Add scopes and login flow sessions for Login Flow v2
+
+This migration adds support for:
+1. Scoped app passwords (scopes column + username column on app_passwords)
+2. Login Flow v2 session tracking (login_flow_sessions table)
+
+Nullable scopes preserves backward compat: NULL = legacy app password = all scopes allowed.
+
+Revision ID: 003
+Revises: 002
+Create Date: 2026-02-27 12:00:00.000000
+
+"""
+
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "003"
+down_revision = "002"
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    """Add scopes/username to app_passwords and create login_flow_sessions."""
+
+    # Add scopes column (nullable JSON array, NULL = all scopes allowed)
+    op.execute(
+        """
+        ALTER TABLE app_passwords ADD COLUMN scopes TEXT
+        """
+    )
+
+    # Add username column (Nextcloud loginName from Login Flow v2)
+    op.execute(
+        """
+        ALTER TABLE app_passwords ADD COLUMN username TEXT
+        """
+    )
+
+    # Login Flow v2 session tracking
+    op.execute(
+        """
+        CREATE TABLE IF NOT EXISTS login_flow_sessions (
+            user_id TEXT PRIMARY KEY,
+            encrypted_poll_token BLOB NOT NULL,
+            poll_endpoint TEXT NOT NULL,
+            requested_scopes TEXT,
+            created_at INTEGER NOT NULL,
+            expires_at INTEGER NOT NULL
+        )
+        """
+    )
+
+    # Index for efficient cleanup of expired sessions
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_login_flow_sessions_expires
+        ON login_flow_sessions(expires_at)
+        """
+    )
+
+
+def downgrade() -> None:
+    """Drop login_flow_sessions and remove added columns."""
+
+    op.execute("DROP INDEX IF EXISTS idx_login_flow_sessions_expires")
+    op.execute("DROP TABLE IF EXISTS login_flow_sessions")
+
+    # SQLite doesn't support DROP COLUMN before 3.35.0
+    # Recreate app_passwords without the new columns
+    op.execute(
+        """
+        CREATE TABLE app_passwords_backup (
+            user_id TEXT PRIMARY KEY,
+            encrypted_password BLOB NOT NULL,
+            created_at INTEGER NOT NULL,
+            updated_at INTEGER NOT NULL
+        )
+        """
+    )
+    op.execute(
+        """
+        INSERT INTO app_passwords_backup (user_id, encrypted_password, created_at, updated_at)
+        SELECT user_id, encrypted_password, created_at, updated_at FROM app_passwords
+        """
+    )
+    op.execute("DROP TABLE app_passwords")
+    op.execute("ALTER TABLE app_passwords_backup RENAME TO app_passwords")
+    op.execute(
+        """
+        CREATE INDEX IF NOT EXISTS idx_app_passwords_updated
+        ON app_passwords(updated_at)
+        """
+    )
@@ -0,0 +1,86 @@
+"""Management API for Nextcloud MCP Server.
+
+Provides REST endpoints for the Nextcloud PHP app to query server status,
+user sessions, and vector sync metrics. All endpoints use OAuth bearer token
+authentication via the UnifiedTokenVerifier.
+
+This package is organized into modules by domain:
+- management.py: Server status, user sessions, shared helpers
+- passwords.py: App password provisioning for multi-user BasicAuth
+- webhooks.py: Webhook registration management
+- visualization.py: Search and PDF visualization endpoints
+"""
+
+from nextcloud_mcp_server.api.access import (
+    get_user_access,
+    list_supported_scopes,
+    update_user_scopes,
+)
+
+# Re-export all public functions for backward compatibility
+from nextcloud_mcp_server.api.management import (
+    __version__,
+    _parse_float_param,
+    _parse_int_param,
+    _sanitize_error_for_client,
+    _validate_query_string,
+    extract_bearer_token,
+    get_server_status,
+    get_user_session,
+    get_vector_sync_status,
+    revoke_user_access,
+    validate_token_and_get_user,
+)
+from nextcloud_mcp_server.api.passwords import (
+    delete_app_password,
+    get_app_password_status,
+    provision_app_password,
+)
+from nextcloud_mcp_server.api.visualization import (
+    get_chunk_context,
+    get_pdf_preview,
+    unified_search,
+    vector_search,
+)
+from nextcloud_mcp_server.api.webhooks import (
+    create_webhook,
+    delete_webhook,
+    get_installed_apps,
+    list_webhooks,
+)
+
+__all__ = [
+    # Access endpoints (from access.py)
+    "get_user_access",
+    "update_user_scopes",
+    "list_supported_scopes",
+    # Version
+    "__version__",
+    # Shared helpers (from management.py)
+    "extract_bearer_token",
+    "validate_token_and_get_user",
+    "_sanitize_error_for_client",
+    "_parse_int_param",
+    "_parse_float_param",
+    "_validate_query_string",
+    # Status endpoints (from management.py)
+    "get_server_status",
+    "get_vector_sync_status",
+    # Session endpoints (from management.py)
+    "get_user_session",
+    "revoke_user_access",
+    # Password endpoints (from passwords.py)
+    "provision_app_password",
+    "get_app_password_status",
+    "delete_app_password",
+    # Webhook endpoints (from webhooks.py)
+    "get_installed_apps",
+    "list_webhooks",
+    "create_webhook",
+    "delete_webhook",
+    # Visualization endpoints (from visualization.py)
+    "unified_search",
+    "vector_search",
+    "get_chunk_context",
+    "get_pdf_preview",
+]
@@ -0,0 +1,173 @@
+"""Access and scope management API endpoints.
+
+Provides REST API endpoints for querying and managing user access status
+and application-level scopes for Login Flow v2 mode.
+"""
+
+import logging
+
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from nextcloud_mcp_server.api.management import _sanitize_error_for_client
+from nextcloud_mcp_server.api.passwords import (
+    _extract_basic_auth,
+    _get_app_password_storage,
+)
+from nextcloud_mcp_server.auth.scope_authorization import invalidate_scope_cache
+from nextcloud_mcp_server.models.auth import ALL_SUPPORTED_SCOPES
+
+logger = logging.getLogger(__name__)
+
+
+async def get_user_access(request: Request) -> JSONResponse:
+    """GET /api/v1/users/{user_id}/access - Get user's provisioned access and scopes.
+
+    Returns the user's current provisioning status, granted scopes, and metadata.
+    Requires BasicAuth with the user's credentials.
+    """
+    path_user_id = request.path_params.get("user_id")
+    if not path_user_id:
+        return JSONResponse(
+            {"success": False, "error": "Missing user_id in path"},
+            status_code=400,
+        )
+
+    username, _, error_response = _extract_basic_auth(request, path_user_id)
+    if error_response is not None:
+        return error_response
+
+    try:
+        storage = await _get_app_password_storage(request)
+        data = await storage.get_app_password_with_scopes(username)
+
+        if data is None:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "user_id": username,
+                    "provisioned": False,
+                    "scopes": None,
+                    "username": None,
+                }
+            )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "user_id": username,
+                "provisioned": True,
+                "scopes": data["scopes"],
+                "username": data.get("username"),
+                "created_at": data.get("created_at"),
+                "updated_at": data.get("updated_at"),
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_user_access")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
+
+
+async def update_user_scopes(request: Request) -> JSONResponse:
+    """PATCH /api/v1/users/{user_id}/scopes - Update user's application-level scopes.
+
+    Accepts JSON body with:
+    - scopes: list[str] - New scope set to apply
+
+    This only updates the stored scopes, not the app password itself.
+    The app password remains valid; scope enforcement is application-level.
+
+    Security note: This endpoint allows direct scope modification without
+    re-authenticating via Login Flow. The caller must authenticate with
+    valid BasicAuth credentials (user_id + app_password), which serves
+    as the authorization check.
+    """
+    path_user_id = request.path_params.get("user_id")
+    if not path_user_id:
+        return JSONResponse(
+            {"success": False, "error": "Missing user_id in path"},
+            status_code=400,
+        )
+
+    username, _, error_response = _extract_basic_auth(request, path_user_id)
+    if error_response is not None:
+        return error_response
+
+    try:
+        body = await request.json()
+    except Exception:
+        return JSONResponse(
+            {"success": False, "error": "Invalid JSON body"},
+            status_code=400,
+        )
+
+    scopes = body.get("scopes")
+    if scopes is None or not isinstance(scopes, list):
+        return JSONResponse(
+            {"success": False, "error": "scopes must be a list of strings"},
+            status_code=400,
+        )
+
+    # Validate scopes
+    invalid = [s for s in scopes if s not in ALL_SUPPORTED_SCOPES]
+    if invalid:
+        return JSONResponse(
+            {
+                "success": False,
+                "error": f"Invalid scopes: {', '.join(invalid)}",
+                "valid_scopes": sorted(ALL_SUPPORTED_SCOPES),
+            },
+            status_code=400,
+        )
+
+    try:
+        storage = await _get_app_password_storage(request)
+        existing = await storage.get_app_password_with_scopes(username)
+
+        if existing is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "No app password provisioned for this user",
+                },
+                status_code=404,
+            )
+
+        # Update scopes only (no decrypt/re-encrypt of the password)
+        await storage.update_app_password_scopes(
+            user_id=username,
+            scopes=scopes,
+        )
+
+        # Invalidate scope cache so subsequent tool calls see updated scopes
+        invalidate_scope_cache(username)
+
+        return JSONResponse(
+            {
+                "success": True,
+                "user_id": username,
+                "scopes": scopes,
+                "message": "Scopes updated successfully",
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "update_user_scopes")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
+
+
+async def list_supported_scopes(_: Request) -> JSONResponse:
+    """GET /api/v1/scopes - List all supported application-level scopes."""
+    return JSONResponse(
+        {
+            "success": True,
+            "scopes": sorted(ALL_SUPPORTED_SCOPES),
+        }
+    )
@@ -0,0 +1,514 @@
+"""Management API endpoints for Nextcloud PHP app integration.
+
+ADR-018: Provides REST API endpoints for the Nextcloud PHP app to query:
+- Server status and version
+- User session information and background access status
+- Vector sync metrics
+
+All endpoints use OAuth bearer token authentication via UnifiedTokenVerifier.
+The PHP app obtains tokens through PKCE flow and uses them to access these endpoints.
+
+Shared helper functions for other API modules are also exported from here:
+- extract_bearer_token: Extract OAuth token from request
+- validate_token_and_get_user: Validate token and get user ID
+- _sanitize_error_for_client: Return safe error messages
+- _parse_int_param, _parse_float_param, _validate_query_string: Parameter validation
+"""
+
+import logging
+import time
+from importlib.metadata import version
+from typing import Any
+
+from qdrant_client.models import Filter
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.config_validators import AuthMode, detect_auth_mode
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+# Get package version from metadata
+__version__ = version("nextcloud-mcp-server")
+
+# Track server start time for uptime calculation
+_server_start_time = time.time()
+
+
+def extract_bearer_token(request: Request) -> str | None:
+    """Extract OAuth bearer token from Authorization header.
+
+    Args:
+        request: Starlette request
+
+    Returns:
+        Token string or None if no valid Authorization header
+    """
+    auth_header = request.headers.get("Authorization")
+    if not auth_header:
+        return None
+
+    # Parse "Bearer <token>"
+    parts = auth_header.split()
+    if len(parts) != 2 or parts[0].lower() != "bearer":
+        return None
+
+    return parts[1]
+
+
+async def validate_token_and_get_user(
+    request: Request,
+) -> tuple[str, dict[str, Any]]:
+    """Validate OAuth bearer token and extract user ID.
+
+    Uses verify_token_for_management_api which accepts any valid Nextcloud OIDC
+    token (not just MCP-audience tokens). This is needed because Astrolabe
+    (NC PHP app) uses its own OAuth client, separate from MCP server's client.
+
+    Security Model:
+    ~~~~~~~~~~~~~~~
+    - **Authentication** (this function): Verifies token is cryptographically valid
+      and extracts user identity from the `sub` claim.
+    - **Authorization** (calling endpoints): Each endpoint MUST verify that the
+      authenticated user owns the requested resource. For example:
+      - GET /users/{user_id}/session: Checks token_user_id == path_user_id (403 if mismatch)
+      - POST /users/{user_id}/revoke: Checks token_user_id == path_user_id (403 if mismatch)
+
+    This separation ensures that even without audience validation, users can only
+    access their own resources. Cross-user access is blocked at the authorization layer.
+
+    Args:
+        request: Starlette request with Authorization header
+
+    Returns:
+        Tuple of (user_id, validated_token_data)
+
+    Raises:
+        Exception: If token is invalid or missing
+    """
+    token = extract_bearer_token(request)
+    if not token:
+        raise ValueError("Missing Authorization header")
+
+    # Get token verifier from app state
+    # Note: This is set in app.py starlette_lifespan for OAuth mode
+    token_verifier = request.app.state.oauth_context["token_verifier"]
+
+    # Validate token for management API (handles both JWT and opaque tokens)
+    # Uses verify_token_for_management_api which accepts any valid Nextcloud token
+    # without requiring MCP audience - needed for Astrolabe integration (ADR-018)
+    access_token = await token_verifier.verify_token_for_management_api(token)
+
+    if not access_token:
+        raise ValueError("Token validation failed")
+
+    # Extract user ID from AccessToken.resource field (set during verification)
+    user_id = access_token.resource
+    if not user_id:
+        raise ValueError("Token missing user identifier")
+
+    # Return user_id and a dict with token info for compatibility
+    validated = {
+        "sub": user_id,
+        "client_id": access_token.client_id,
+        "scopes": access_token.scopes,
+        "expires_at": access_token.expires_at,
+    }
+
+    return user_id, validated
+
+
+def _sanitize_error_for_client(error: Exception, context: str = "") -> str:
+    """
+    Return a safe, generic error message for clients.
+
+    Detailed error is logged internally but not exposed to clients to prevent
+    information leakage (database paths, API URLs, tokens, etc.).
+
+    Args:
+        error: The exception that occurred
+        context: Optional context for logging (e.g., "revoke_user_access")
+
+    Returns:
+        Generic error message safe for client consumption
+    """
+    # Log detailed error for debugging
+    logger.error(f"Error in {context}: {error}", exc_info=True)
+
+    # Return generic message
+    return "An internal error occurred. Please contact your administrator."
+
+
+def _parse_int_param(
+    value: str | None,
+    default: int,
+    min_val: int,
+    max_val: int,
+    param_name: str,
+) -> int:
+    """Parse and validate integer parameter."""
+    if value is None:
+        return default
+    try:
+        parsed = int(value)
+    except ValueError:
+        raise ValueError(f"Invalid {param_name}: must be an integer")
+    if parsed < min_val or parsed > max_val:
+        raise ValueError(
+            f"Invalid {param_name}: must be between {min_val} and {max_val}"
+        )
+    return parsed
+
+
+def _parse_float_param(
+    value: Any,
+    default: float,
+    min_val: float,
+    max_val: float,
+    param_name: str,
+) -> float:
+    """Parse and validate float parameter."""
+    if value is None:
+        return default
+    try:
+        parsed = float(value)
+    except (ValueError, TypeError):
+        raise ValueError(f"Invalid {param_name}: must be a number")
+    if parsed < min_val or parsed > max_val:
+        raise ValueError(
+            f"Invalid {param_name}: must be between {min_val} and {max_val}"
+        )
+    return parsed
+
+
+def _validate_query_string(query: str, max_length: int = 10000) -> None:
+    """Validate query string length."""
+    if len(query) > max_length:
+        raise ValueError(f"Query too long: maximum {max_length} characters")
+
+
+async def get_server_status(request: Request) -> JSONResponse:
+    """GET /api/v1/status - Server status and version.
+
+    Returns basic server information including version, auth mode,
+    vector sync status, and uptime.
+
+    Public endpoint - no authentication required.
+    """
+    # Public endpoint - no authentication required
+
+    # Get configuration
+    settings = get_settings()
+
+    # Calculate uptime
+    uptime_seconds = int(time.time() - _server_start_time)
+
+    # Determine auth mode using proper mode detection
+    mode = detect_auth_mode(settings)
+
+    # Map deployment mode to auth_mode for API response
+    # This helps clients (like Astrolabe) determine which auth flow to use
+    if mode == AuthMode.OAUTH_SINGLE_AUDIENCE or mode == AuthMode.OAUTH_TOKEN_EXCHANGE:
+        auth_mode = "oauth"
+    elif mode == AuthMode.MULTI_USER_BASIC:
+        auth_mode = "multi_user_basic"
+    elif mode == AuthMode.SINGLE_USER_BASIC:
+        auth_mode = "basic"
+    elif mode == AuthMode.SMITHERY_STATELESS:
+        auth_mode = "smithery"
+    else:
+        auth_mode = "unknown"
+
+    response_data = {
+        "version": __version__,
+        "auth_mode": auth_mode,
+        "vector_sync_enabled": settings.vector_sync_enabled,
+        "uptime_seconds": uptime_seconds,
+        "management_api_version": "1.0",
+    }
+
+    # Add app password support indicator for multi-user BasicAuth mode
+    if mode == AuthMode.MULTI_USER_BASIC:
+        response_data["supports_app_passwords"] = settings.enable_offline_access
+
+    # Include OIDC configuration for client discovery (e.g. Astrolabe PHP app).
+    # Always attempt to provide oidc.discovery_url so clients can discover the
+    # IdP regardless of the current auth mode. This enables smoother transitions
+    # between auth modes and lets Astrolabe pre-discover OIDC endpoints.
+    oidc_config: dict[str, str] = {}
+
+    if settings.oidc_discovery_url:
+        # Explicit OIDC_DISCOVERY_URL takes precedence
+        oidc_config["discovery_url"] = settings.oidc_discovery_url
+    elif settings.nextcloud_host:
+        # Auto-derive from NEXTCLOUD_HOST — Nextcloud exposes OIDC discovery
+        # at the standard well-known path when user_oidc is enabled
+        host = settings.nextcloud_host.rstrip("/")
+        oidc_config["discovery_url"] = f"{host}/.well-known/openid-configuration"
+
+    if settings.oidc_issuer:
+        oidc_config["issuer"] = settings.oidc_issuer
+
+    if oidc_config:
+        response_data["oidc"] = oidc_config
+
+    return JSONResponse(response_data)
+
+
+async def get_vector_sync_status(request: Request) -> JSONResponse:
+    """GET /api/v1/vector-sync/status - Vector sync metrics.
+
+    Returns real-time indexing status and metrics.
+
+    Requires: VECTOR_SYNC_ENABLED=true
+
+    Public endpoint - no authentication required.
+    """
+    # Public endpoint - no authentication required
+
+    settings = get_settings()
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"error": "Vector sync is disabled on this server"},
+            status_code=404,
+        )
+
+    try:
+        # Get document receive stream from app state (set by starlette_lifespan in app.py)
+        document_receive_stream = getattr(
+            request.app.state, "document_receive_stream", None
+        )
+
+        if document_receive_stream is None:
+            logger.debug("document_receive_stream not available in app state")
+            return JSONResponse(
+                {
+                    "status": "unknown",
+                    "indexed_documents": 0,
+                    "pending_documents": 0,
+                    "message": "Vector sync stream not initialized",
+                }
+            )
+
+        # Get pending count from stream statistics
+        stream_stats = document_receive_stream.statistics()
+        pending_count = stream_stats.current_buffer_used
+
+        # Get Qdrant client and query indexed count
+        indexed_count = 0
+        try:
+            qdrant_client = await get_qdrant_client()
+
+            # Count documents in collection, excluding placeholders
+            count_result = await qdrant_client.count(
+                collection_name=settings.get_collection_name(),
+                count_filter=Filter(must=[get_placeholder_filter()]),
+            )
+            indexed_count = count_result.count
+
+        except Exception as e:
+            logger.warning(f"Failed to query Qdrant for indexed count: {e}")
+            # Continue with indexed_count = 0
+
+        # Determine status
+        status = "syncing" if pending_count > 0 else "idle"
+
+        return JSONResponse(
+            {
+                "status": status,
+                "indexed_documents": indexed_count,
+                "pending_documents": pending_count,
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_vector_sync_status")
+        return JSONResponse(
+            {"error": error_msg},
+            status_code=500,
+        )
+
+
+async def get_user_session(request: Request) -> JSONResponse:
+    """GET /api/v1/users/{user_id}/session - User session details.
+
+    Returns information about the user's MCP session including:
+    - Background access status (offline_access)
+    - IdP profile information
+
+    Requires OAuth bearer token. The user_id in the path must match
+    the user_id in the token.
+    """
+    try:
+        # Validate OAuth token and extract user
+        token_user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_user_session_auth")
+        return JSONResponse(
+            {"error": error_msg},
+            status_code=401,
+        )
+
+    # Get user_id from path
+    path_user_id = request.path_params.get("user_id")
+
+    # Verify token user matches requested user
+    if token_user_id != path_user_id:
+        logger.warning(
+            f"User {token_user_id} attempted to access session for {path_user_id}"
+        )
+        return JSONResponse(
+            {
+                "error": "Forbidden",
+                "message": "Cannot access another user's session",
+            },
+            status_code=403,
+        )
+
+    # Check if offline access is enabled
+    # Use settings.enable_offline_access which handles both ENABLE_BACKGROUND_OPERATIONS (new)
+    # and ENABLE_OFFLINE_ACCESS (deprecated) environment variables
+    settings = get_settings()
+    enable_offline_access = settings.enable_offline_access
+
+    if not enable_offline_access:
+        # Offline access disabled - return minimal session info
+        return JSONResponse(
+            {
+                "session_id": token_user_id,
+                "background_access_granted": False,
+            }
+        )
+
+    # Get refresh token storage from app state
+    storage = request.app.state.oauth_context.get("storage")
+    if not storage:
+        logger.error("Refresh token storage not available in app state")
+        return JSONResponse(
+            {
+                "session_id": token_user_id,
+                "background_access_granted": False,
+                "error": "Storage not configured",
+            }
+        )
+
+    try:
+        # Check if user has refresh token stored
+        refresh_token_data = await storage.get_refresh_token(token_user_id)
+
+        if not refresh_token_data:
+            # No refresh token - user hasn't provisioned background access
+            return JSONResponse(
+                {
+                    "session_id": token_user_id,
+                    "background_access_granted": False,
+                }
+            )
+
+        # User has background access - get profile info
+        profile = await storage.get_user_profile(token_user_id)
+
+        response_data = {
+            "session_id": token_user_id,
+            "background_access_granted": True,
+            "background_access_details": {
+                "granted_at": refresh_token_data.get("created_at"),
+                "scopes": refresh_token_data.get("scope", "").split(),
+            },
+        }
+
+        if profile:
+            response_data["idp_profile"] = profile
+
+        return JSONResponse(response_data)
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_user_session")
+        return JSONResponse(
+            {"error": error_msg},
+            status_code=500,
+        )
+
+
+async def revoke_user_access(request: Request) -> JSONResponse:
+    """POST /api/v1/users/{user_id}/revoke - Revoke user's background access.
+
+    Deletes the user's stored refresh token, removing their offline access.
+
+    Requires OAuth bearer token. The user_id in the path must match
+    the user_id in the token.
+    """
+    try:
+        # Validate OAuth token and extract user
+        token_user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/users/{{user_id}}/revoke: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "revoke_user_access"),
+            },
+            status_code=401,
+        )
+
+    # Get user_id from path
+    path_user_id = request.path_params.get("user_id")
+
+    # Verify token user matches requested user
+    if token_user_id != path_user_id:
+        logger.warning(
+            f"User {token_user_id} attempted to revoke access for {path_user_id}"
+        )
+        return JSONResponse(
+            {
+                "error": "Forbidden",
+                "message": "Cannot revoke another user's access",
+            },
+            status_code=403,
+        )
+
+    # Get token broker from app state
+    oauth_context = request.app.state.oauth_context
+    if oauth_context is None:
+        logger.error("OAuth context not initialized")
+        return JSONResponse(
+            {"error": "OAuth not enabled"},
+            status_code=500,
+        )
+
+    token_broker = oauth_context.get("token_broker")
+    if not token_broker:
+        logger.error("Token broker not available in app state")
+        return JSONResponse(
+            {"error": "Token broker not configured"},
+            status_code=500,
+        )
+
+    try:
+        # Delete refresh token from storage
+        await token_broker.storage.delete_refresh_token(token_user_id)
+
+        # CRITICAL: Invalidate all cached tokens for this user
+        await token_broker.cache.invalidate(token_user_id)
+
+        logger.info(
+            f"Revoked background access for user {token_user_id} (cache and storage cleared)"
+        )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "message": f"Background access revoked for {token_user_id}",
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "revoke_user_access")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
@@ -0,0 +1,441 @@
+"""App password management API endpoints.
+
+Provides REST API endpoints for app password provisioning in multi-user BasicAuth mode.
+These endpoints are used by the Nextcloud PHP app (Astrolabe) to:
+- Store app passwords for background sync operations
+- Check app password status
+- Delete stored app passwords
+
+Authentication is via BasicAuth with the user's Nextcloud credentials.
+Passwords are validated against Nextcloud before being stored.
+"""
+
+import base64
+import logging
+import re
+import time
+from collections import defaultdict
+
+import httpx
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from nextcloud_mcp_server.api.management import _sanitize_error_for_client
+from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
+from nextcloud_mcp_server.config import get_settings
+
+from ..http import nextcloud_httpx_client
+
+logger = logging.getLogger(__name__)
+
+# App password format regex (Nextcloud format: xxxxx-xxxxx-xxxxx-xxxxx-xxxxx)
+APP_PASSWORD_PATTERN = re.compile(
+    r"^[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}-[a-zA-Z0-9]{5}$"
+)
+
+# Timeout for Nextcloud API validation requests (seconds)
+NEXTCLOUD_VALIDATION_TIMEOUT = 10.0
+
+# Rate limiting configuration for app password provisioning
+# Limits: 5 attempts per user per hour
+RATE_LIMIT_MAX_ATTEMPTS = 5
+RATE_LIMIT_WINDOW_SECONDS = 3600  # 1 hour
+
+# In-memory rate limiter storage
+# Structure: {user_id: [(timestamp, success), ...]}
+_rate_limit_attempts: dict[str, list[tuple[float, bool]]] = defaultdict(list)
+
+
+def _check_rate_limit(user_id: str) -> tuple[bool, int]:
+    """Check if user is rate limited for app password operations.
+
+    Implements a sliding window rate limiter to prevent brute-force attacks
+    on the app password provisioning endpoint.
+
+    Args:
+        user_id: User identifier to check
+
+    Returns:
+        Tuple of (is_allowed, seconds_until_retry)
+        - is_allowed: True if request should be allowed
+        - seconds_until_retry: Seconds to wait if rate limited (0 if allowed)
+    """
+    current_time = time.time()
+    window_start = current_time - RATE_LIMIT_WINDOW_SECONDS
+
+    # Clean up old attempts outside the window
+    _rate_limit_attempts[user_id] = [
+        (ts, success)
+        for ts, success in _rate_limit_attempts[user_id]
+        if ts > window_start
+    ]
+
+    # Count recent attempts (both successful and failed)
+    recent_attempts = len(_rate_limit_attempts[user_id])
+
+    if recent_attempts >= RATE_LIMIT_MAX_ATTEMPTS:
+        # Find when the oldest attempt in the window will expire
+        oldest_attempt = min(ts for ts, _ in _rate_limit_attempts[user_id])
+        seconds_until_retry = int(
+            oldest_attempt + RATE_LIMIT_WINDOW_SECONDS - current_time
+        )
+        return False, max(1, seconds_until_retry)
+
+    return True, 0
+
+
+def _record_rate_limit_attempt(user_id: str, success: bool) -> None:
+    """Record an app password provisioning attempt for rate limiting.
+
+    Args:
+        user_id: User identifier
+        success: Whether the attempt was successful
+    """
+    _rate_limit_attempts[user_id].append((time.time(), success))
+
+
+def _extract_basic_auth(
+    request: Request, path_user_id: str
+) -> tuple[str, str, JSONResponse | None]:
+    """Extract and validate BasicAuth credentials from request.
+
+    Validates:
+    1. Authorization header is present and valid BasicAuth format
+    2. Username in credentials matches the path user_id
+
+    Args:
+        request: Starlette request with Authorization header
+        path_user_id: User ID from the URL path to verify against
+
+    Returns:
+        Tuple of (username, password, error_response)
+        - If successful: (username, password, None)
+        - If failed: ("", "", JSONResponse with error)
+    """
+    auth_header = request.headers.get("Authorization")
+
+    if not auth_header or not auth_header.startswith("Basic "):
+        return (
+            "",
+            "",
+            JSONResponse(
+                {"success": False, "error": "Missing BasicAuth credentials"},
+                status_code=401,
+            ),
+        )
+
+    try:
+        # Decode BasicAuth
+        encoded = auth_header.split(" ", 1)[1]
+        decoded = base64.b64decode(encoded).decode("utf-8")
+        username, password = decoded.split(":", 1)
+    except Exception:
+        return (
+            "",
+            "",
+            JSONResponse(
+                {"success": False, "error": "Invalid BasicAuth format"},
+                status_code=401,
+            ),
+        )
+
+    # Verify username matches path user_id
+    if username != path_user_id:
+        logger.warning(
+            f"Username mismatch in app password operation for path user {path_user_id}"
+        )
+        return (
+            "",
+            "",
+            JSONResponse(
+                {"success": False, "error": "Username does not match path user_id"},
+                status_code=403,
+            ),
+        )
+
+    return username, password, None
+
+
+async def _get_app_password_storage(request: Request) -> RefreshTokenStorage:
+    """Get or initialize RefreshTokenStorage for app password operations.
+
+    Checks app.state.storage first, then falls back to creating from environment.
+    This helper avoids repeated storage initialization logic across endpoints.
+
+    Args:
+        request: Starlette request with app state
+
+    Returns:
+        Initialized RefreshTokenStorage instance
+    """
+    storage = getattr(request.app.state, "storage", None)
+
+    if not storage:
+        # Multi-user BasicAuth mode may not have oauth_context
+        # Initialize storage from environment
+        storage = RefreshTokenStorage.from_env()
+        await storage.initialize()
+
+    return storage
+
+
+async def provision_app_password(request: Request) -> JSONResponse:
+    """POST /api/v1/users/{user_id}/app-password - Store app password for background sync.
+
+    This endpoint is used by Astrolabe (Nextcloud PHP app) to provision app passwords
+    for multi-user BasicAuth mode background sync.
+
+    The request must include BasicAuth credentials where:
+    - username: Nextcloud user ID (must match path user_id)
+    - password: The app password being provisioned
+
+    The MCP server validates the app password against Nextcloud before storing it.
+    This proves the user owns the password and has access to Nextcloud.
+
+    Security model:
+    - User identity is verified via BasicAuth against Nextcloud
+    - App password is encrypted before storage
+    - Only the user who owns the password can provision it
+    - Rate limited to prevent brute-force attacks
+    """
+    # Get user_id from path
+    path_user_id = request.path_params.get("user_id")
+    if not path_user_id:
+        return JSONResponse(
+            {"success": False, "error": "Missing user_id in path"},
+            status_code=400,
+        )
+
+    # Check rate limit before processing
+    is_allowed, retry_after = _check_rate_limit(path_user_id)
+    if not is_allowed:
+        logger.warning(
+            f"Rate limit exceeded for app password provisioning: {path_user_id}"
+        )
+        return JSONResponse(
+            {
+                "success": False,
+                "error": f"Rate limit exceeded. Try again in {retry_after} seconds.",
+            },
+            status_code=429,
+            headers={"Retry-After": str(retry_after)},
+        )
+
+    # Extract and validate BasicAuth credentials
+    username, app_password, error_response = _extract_basic_auth(request, path_user_id)
+    if error_response is not None:
+        _record_rate_limit_attempt(path_user_id, success=False)
+        return error_response
+
+    # Validate app password format
+    if not APP_PASSWORD_PATTERN.match(app_password):
+        _record_rate_limit_attempt(path_user_id, success=False)
+        return JSONResponse(
+            {"success": False, "error": "Invalid app password format"},
+            status_code=400,
+        )
+
+    # Get Nextcloud host from settings
+    settings = get_settings()
+    nextcloud_host = settings.nextcloud_host
+
+    if not nextcloud_host:
+        logger.error("NEXTCLOUD_HOST not configured")
+        return JSONResponse(
+            {"success": False, "error": "Server not configured"},
+            status_code=500,
+        )
+
+    # Validate app password against Nextcloud
+    try:
+        async with nextcloud_httpx_client(
+            timeout=NEXTCLOUD_VALIDATION_TIMEOUT
+        ) as client:
+            # Use OCS API to verify credentials
+            test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user"
+            response = await client.get(
+                test_url,
+                auth=(username, app_password),
+                params={"format": "json"},
+                headers={"OCS-APIRequest": "true"},
+            )
+
+            if response.status_code != 200:
+                logger.warning(
+                    f"App password validation failed for user: HTTP {response.status_code}"
+                )
+                _record_rate_limit_attempt(path_user_id, success=False)
+                return JSONResponse(
+                    {"success": False, "error": "Invalid app password"},
+                    status_code=401,
+                )
+
+            # Verify the user ID from response matches
+            data = response.json()
+            ocs_user_id = data.get("ocs", {}).get("data", {}).get("id")
+            if ocs_user_id != username:
+                logger.warning("User ID mismatch in OCS response")
+                _record_rate_limit_attempt(path_user_id, success=False)
+                return JSONResponse(
+                    {"success": False, "error": "User ID mismatch"},
+                    status_code=403,
+                )
+
+    except httpx.RequestError as e:
+        logger.error(f"Failed to validate app password: {e}")
+        return JSONResponse(
+            {"success": False, "error": "Failed to validate credentials"},
+            status_code=500,
+        )
+
+    # Parse optional scopes and username from request body
+    scopes = None
+    nc_username = None
+    try:
+        body = await request.json()
+        scopes = body.get("scopes")  # list[str] | None
+        nc_username = body.get("username")  # Nextcloud loginName
+    except Exception:
+        pass  # No JSON body = legacy call without scopes
+
+    # Store the validated app password
+    try:
+        storage = await _get_app_password_storage(request)
+
+        await storage.store_app_password_with_scopes(
+            username, app_password, scopes=scopes, username=nc_username
+        )
+
+        _record_rate_limit_attempt(path_user_id, success=True)
+        logger.info(f"Provisioned app password for user: {username}")
+
+        return JSONResponse(
+            {
+                "success": True,
+                "message": f"App password stored for {username}",
+                "scopes": scopes,
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "provision_app_password")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
+
+
+async def get_app_password_status(request: Request) -> JSONResponse:
+    """GET /api/v1/users/{user_id}/app-password - Check if user has provisioned app password.
+
+    Returns status of background sync access for multi-user BasicAuth mode.
+
+    Requires BasicAuth with the user's app password for authentication.
+    """
+    # Get user_id from path
+    path_user_id = request.path_params.get("user_id")
+    if not path_user_id:
+        return JSONResponse(
+            {"success": False, "error": "Missing user_id in path"},
+            status_code=400,
+        )
+
+    # Extract and validate BasicAuth credentials
+    username, _, error_response = _extract_basic_auth(request, path_user_id)
+    if error_response is not None:
+        return error_response
+
+    try:
+        storage = await _get_app_password_storage(request)
+        app_password = await storage.get_app_password(username)
+
+        return JSONResponse(
+            {
+                "success": True,
+                "user_id": username,
+                "has_app_password": app_password is not None,
+            }
+        )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_app_password_status")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
+
+
+async def delete_app_password(request: Request) -> JSONResponse:
+    """DELETE /api/v1/users/{user_id}/app-password - Delete stored app password.
+
+    Removes the user's app password from MCP server storage.
+
+    Requires BasicAuth with the user's credentials.
+    """
+    # Get user_id from path
+    path_user_id = request.path_params.get("user_id")
+    if not path_user_id:
+        return JSONResponse(
+            {"success": False, "error": "Missing user_id in path"},
+            status_code=400,
+        )
+
+    # Extract and validate BasicAuth credentials
+    username, password, error_response = _extract_basic_auth(request, path_user_id)
+    if error_response is not None:
+        return error_response
+
+    # Validate credentials against Nextcloud
+    settings = get_settings()
+    nextcloud_host = settings.nextcloud_host
+
+    try:
+        async with nextcloud_httpx_client(
+            timeout=NEXTCLOUD_VALIDATION_TIMEOUT
+        ) as client:
+            test_url = f"{nextcloud_host}/ocs/v1.php/cloud/user"
+            response = await client.get(
+                test_url,
+                auth=(username, password),
+                params={"format": "json"},
+                headers={"OCS-APIRequest": "true"},
+            )
+
+            if response.status_code != 200:
+                return JSONResponse(
+                    {"success": False, "error": "Invalid credentials"},
+                    status_code=401,
+                )
+    except httpx.RequestError as e:
+        logger.error(f"Failed to validate credentials: {e}")
+        return JSONResponse(
+            {"success": False, "error": "Failed to validate credentials"},
+            status_code=500,
+        )
+
+    try:
+        storage = await _get_app_password_storage(request)
+        deleted = await storage.delete_app_password(username)
+
+        if deleted:
+            logger.info(f"Deleted app password for user: {username}")
+            return JSONResponse(
+                {
+                    "success": True,
+                    "message": f"App password deleted for {username}",
+                }
+            )
+        else:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "message": "No app password found to delete",
+                }
+            )
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "delete_app_password")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
@@ -0,0 +1,776 @@
+"""Visualization API endpoints for search and PDF preview.
+
+ADR-018: Provides REST API endpoints for the Nextcloud PHP app (Astrolabe) to:
+- Execute unified search with semantic/BM25/hybrid algorithms
+- Execute vector search with PCA visualization coordinates
+- Fetch chunk context with surrounding text
+- Render PDF pages server-side (avoiding CSP/worker issues)
+
+All endpoints require OAuth bearer token authentication via UnifiedTokenVerifier.
+"""
+
+import base64
+import logging
+from typing import Any
+
+import pymupdf
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from nextcloud_mcp_server.api.management import (
+    _parse_float_param,
+    _parse_int_param,
+    _sanitize_error_for_client,
+    _validate_query_string,
+    extract_bearer_token,
+    validate_token_and_get_user,
+)
+from nextcloud_mcp_server.client import NextcloudClient
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.embedding.service import get_embedding_service
+from nextcloud_mcp_server.search import (
+    BM25HybridSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)
+from nextcloud_mcp_server.search.context import get_chunk_with_context
+from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+from nextcloud_mcp_server.vector.visualization import compute_pca_coordinates
+
+logger = logging.getLogger(__name__)
+
+
+async def unified_search(request: Request) -> JSONResponse:
+    """POST /api/v1/search - Search endpoint for Nextcloud Unified Search.
+
+    Optimized search endpoint for the Nextcloud Unified Search provider
+    and other PHP app integrations. Returns results with metadata needed
+    for navigation to source documents.
+
+    Request body:
+    {
+        "query": "search query",
+        "algorithm": "semantic|bm25|hybrid",  // default: hybrid
+        "limit": 20,  // max: 100
+        "offset": 0,  // pagination offset
+        "include_pca": false,  // optional PCA coordinates
+        "include_chunks": true  // include text snippets
+    }
+
+    Response:
+    {
+        "results": [{
+            "id": "doc123",
+            "doc_type": "note",
+            "title": "Document Title",
+            "excerpt": "Matching text snippet...",
+            "score": 0.85,
+            "path": "/path/to/file.txt",  // for files
+            "board_id": 1,  // for deck cards
+            "card_id": 42
+        }],
+        "total_found": 150,
+        "algorithm_used": "hybrid"
+    }
+
+    Requires OAuth bearer token for user filtering.
+    """
+    settings = get_settings()
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"error": "Vector sync is disabled on this server"},
+            status_code=404,
+        )
+
+    # Validate OAuth token and extract user
+    try:
+        user_id, _validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/search: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "unified_search"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Parse request body
+        body = await request.json()
+
+        # Validate and parse parameters
+        try:
+            query = body.get("query", "")
+            _validate_query_string(query, max_length=10000)
+
+            limit = _parse_int_param(
+                str(body.get("limit")) if body.get("limit") is not None else None,
+                20,
+                1,
+                100,
+                "limit",
+            )
+
+            offset = _parse_int_param(
+                str(body.get("offset")) if body.get("offset") is not None else None,
+                0,
+                0,
+                1000000,
+                "offset",
+            )
+
+            score_threshold = _parse_float_param(
+                body.get("score_threshold"),
+                0.0,
+                0.0,
+                1.0,
+                "score_threshold",
+            )
+        except ValueError as e:
+            return JSONResponse({"error": str(e)}, status_code=400)
+
+        algorithm = body.get("algorithm", "hybrid")
+        fusion = body.get("fusion", "rrf")
+        include_pca = body.get("include_pca", False)
+        include_chunks = body.get("include_chunks", True)
+        doc_types = body.get("doc_types")  # Optional filter
+
+        if not query:
+            return JSONResponse({"results": [], "total_found": 0})
+
+        # Validate algorithm
+        valid_algorithms = {"semantic", "bm25", "hybrid"}
+        if algorithm not in valid_algorithms:
+            algorithm = "hybrid"
+
+        # Validate fusion method
+        valid_fusions = {"rrf", "dbsf"}
+        if fusion not in valid_fusions:
+            fusion = "rrf"
+
+        # Select search algorithm
+        if algorithm == "semantic":
+            search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+        else:
+            search_algo = BM25HybridSearchAlgorithm(
+                score_threshold=score_threshold, fusion=fusion
+            )
+
+        # Request extra results to handle offset
+        search_limit = limit + offset
+
+        # Execute search
+        all_results = []
+        if doc_types and isinstance(doc_types, list):
+            for doc_type in doc_types:
+                if doc_type:
+                    results = await search_algo.search(
+                        query=query,
+                        user_id=user_id,
+                        limit=search_limit,
+                        doc_type=doc_type,
+                    )
+                    all_results.extend(results)
+            all_results.sort(key=lambda r: r.score, reverse=True)
+        else:
+            all_results = await search_algo.search(
+                query=query,
+                user_id=user_id,
+                limit=search_limit,
+            )
+
+        # Sort results by score (no deduplication - show all chunks)
+        sorted_results = sorted(all_results, key=lambda r: r.score, reverse=True)
+
+        # Calculate total and apply pagination
+        total_found = len(sorted_results)
+        paginated_results = sorted_results[offset : offset + limit]
+
+        # Format results for Unified Search
+        formatted_results = []
+        for result in paginated_results:
+            # Get document ID (prefer note_id for notes)
+            doc_id = result.id
+            if result.metadata and "note_id" in result.metadata:
+                doc_id = result.metadata["note_id"]
+
+            result_data: dict[str, Any] = {
+                "id": doc_id,
+                "doc_type": result.doc_type,
+                "title": result.title,
+                "score": result.score,
+            }
+
+            # Include excerpt/chunk if requested (full content, no truncation)
+            if include_chunks and result.excerpt:
+                result_data["excerpt"] = result.excerpt
+
+            # Include navigation metadata from result.metadata
+            if result.metadata:
+                # File path and mimetype for files
+                if "path" in result.metadata:
+                    result_data["path"] = result.metadata["path"]
+                if "mime_type" in result.metadata:
+                    result_data["mime_type"] = result.metadata["mime_type"]
+
+                # Deck card navigation
+                if "board_id" in result.metadata:
+                    result_data["board_id"] = result.metadata["board_id"]
+                if "card_id" in result.metadata:
+                    result_data["card_id"] = result.metadata["card_id"]
+
+                # Calendar event metadata
+                if "calendar_id" in result.metadata:
+                    result_data["calendar_id"] = result.metadata["calendar_id"]
+                if "event_uid" in result.metadata:
+                    result_data["event_uid"] = result.metadata["event_uid"]
+
+            # Add PDF page metadata
+            if result.page_number is not None:
+                result_data["page_number"] = result.page_number
+            if result.page_count is not None:
+                result_data["page_count"] = result.page_count
+
+            # Add chunk metadata (always present, defaults to 0 and 1)
+            result_data["chunk_index"] = result.chunk_index
+            result_data["total_chunks"] = result.total_chunks
+
+            # Add chunk offsets for modal navigation
+            if result.chunk_start_offset is not None:
+                result_data["chunk_start_offset"] = result.chunk_start_offset
+            if result.chunk_end_offset is not None:
+                result_data["chunk_end_offset"] = result.chunk_end_offset
+
+            formatted_results.append(result_data)
+
+        response_data: dict[str, Any] = {
+            "results": formatted_results,
+            "total_found": total_found,
+            "algorithm_used": algorithm,
+        }
+
+        # Optional PCA coordinates
+        if include_pca and len(paginated_results) >= 2:
+            try:
+                if search_algo.query_embedding is not None:
+                    query_embedding = search_algo.query_embedding
+                else:
+                    embedding_service = get_embedding_service()
+                    query_embedding = await embedding_service.embed(query)
+
+                pca_data = await compute_pca_coordinates(
+                    paginated_results, query_embedding
+                )
+                response_data["pca_data"] = pca_data
+            except Exception as e:
+                logger.warning(f"Failed to compute PCA for unified search: {e}")
+
+        return JSONResponse(response_data)
+
+    except Exception as e:
+        logger.error(f"Error in unified search: {e}")
+        return JSONResponse(
+            {
+                "error": "Internal error",
+                "message": _sanitize_error_for_client(e, "unified_search"),
+            },
+            status_code=500,
+        )
+
+
+async def vector_search(request: Request) -> JSONResponse:
+    """POST /api/v1/vector-viz/search - Vector search for visualization.
+
+    Executes semantic search and returns results with optional PCA coordinates
+    for 2D visualization.
+
+    Request body:
+    {
+        "query": "search query",
+        "algorithm": "semantic|bm25|hybrid",  // default: hybrid
+        "limit": 10,  // max: 50
+        "include_pca": true,  // whether to include 2D coordinates
+        "doc_types": ["note", "file"]  // optional filter by document types
+    }
+
+    Requires OAuth bearer token for user filtering.
+    """
+    settings = get_settings()
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"error": "Vector sync is disabled on this server"},
+            status_code=404,
+        )
+
+    # Validate OAuth token and extract user
+    try:
+        user_id, _validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/vector-viz/search: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "vector_search"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Parse request body
+        body = await request.json()
+        query = body.get("query", "")
+        algorithm = body.get("algorithm", "hybrid")
+        fusion = body.get("fusion", "rrf")
+        score_threshold = body.get("score_threshold", 0.0)
+        limit = min(body.get("limit", 10), 50)  # Enforce max limit
+        include_pca = body.get("include_pca", True)
+        doc_types = body.get("doc_types")  # Optional list of document types
+
+        if not query:
+            return JSONResponse(
+                {"error": "Missing required parameter: query"},
+                status_code=400,
+            )
+
+        # Validate algorithm
+        valid_algorithms = {"semantic", "bm25", "hybrid"}
+        if algorithm not in valid_algorithms:
+            algorithm = "hybrid"
+
+        # Validate fusion method
+        valid_fusions = {"rrf", "dbsf"}
+        if fusion not in valid_fusions:
+            fusion = "rrf"
+
+        # Select search algorithm
+        if algorithm == "semantic":
+            search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+        else:
+            # Both "hybrid" and "bm25" use the BM25HybridSearchAlgorithm
+            # which combines dense semantic and sparse BM25 vectors
+            search_algo = BM25HybridSearchAlgorithm(
+                score_threshold=score_threshold, fusion=fusion
+            )
+
+        # Execute search for each doc_type if specified, otherwise search all
+        all_results = []
+        if doc_types and isinstance(doc_types, list):
+            # Search each doc_type separately and merge results
+            for doc_type in doc_types:
+                if doc_type:  # Skip empty strings
+                    results = await search_algo.search(
+                        query=query,
+                        user_id=user_id,
+                        limit=limit,
+                        doc_type=doc_type,
+                    )
+                    all_results.extend(results)
+            # Sort merged results by score and limit
+            all_results.sort(key=lambda r: r.score, reverse=True)
+            all_results = all_results[:limit]
+        else:
+            # Search all document types
+            all_results = await search_algo.search(
+                query=query,
+                user_id=user_id,
+                limit=limit,
+            )
+
+        # Format results for PHP client
+        formatted_results = []
+        for result in all_results:
+            formatted_result = {
+                "id": result.id,
+                "doc_type": result.doc_type,
+                "title": result.title,
+                "excerpt": result.excerpt[:200] if result.excerpt else "",
+                "score": result.score,
+                "metadata": result.metadata,
+                # Chunk information for context display
+                "chunk_index": result.chunk_index,
+                "total_chunks": result.total_chunks,
+            }
+            # Include optional fields if present
+            if result.chunk_start_offset is not None:
+                formatted_result["chunk_start_offset"] = result.chunk_start_offset
+            if result.chunk_end_offset is not None:
+                formatted_result["chunk_end_offset"] = result.chunk_end_offset
+            if result.page_number is not None:
+                formatted_result["page_number"] = result.page_number
+            if result.page_count is not None:
+                formatted_result["page_count"] = result.page_count
+            formatted_results.append(formatted_result)
+
+        response_data: dict[str, Any] = {
+            "results": formatted_results,
+            "algorithm_used": algorithm,
+            "total_documents": len(formatted_results),
+        }
+
+        # Compute PCA coordinates for visualization using shared function
+        if include_pca and len(all_results) >= 2:
+            try:
+                # Get query embedding from search algorithm or generate it
+                if search_algo.query_embedding is not None:
+                    query_embedding = search_algo.query_embedding
+                else:
+                    embedding_service = get_embedding_service()
+                    query_embedding = await embedding_service.embed(query)
+
+                pca_data = await compute_pca_coordinates(all_results, query_embedding)
+                response_data["coordinates_3d"] = pca_data["coordinates_3d"]
+                response_data["query_coords"] = pca_data["query_coords"]
+                if "pca_variance" in pca_data:
+                    response_data["pca_variance"] = pca_data["pca_variance"]
+            except Exception as e:
+                logger.warning(f"Failed to compute PCA coordinates: {e}")
+                response_data["coordinates_3d"] = []
+                response_data["query_coords"] = []
+        elif include_pca:
+            # Not enough results for PCA
+            response_data["coordinates_3d"] = []
+            response_data["query_coords"] = []
+
+        return JSONResponse(response_data)
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "vector_search")
+        return JSONResponse(
+            {"error": error_msg},
+            status_code=500,
+        )
+
+
+async def get_chunk_context(request: Request) -> JSONResponse:
+    """GET /api/v1/chunk-context - Fetch chunk text with context.
+
+    Retrieves the matched chunk along with surrounding text and metadata.
+    Used by clients to display chunk context and highlighted PDFs.
+
+    Query parameters:
+        doc_type: Document type (e.g., "note")
+        doc_id: Document ID
+        start: Chunk start offset (character position)
+        end: Chunk end offset (character position)
+        context: Characters of context before/after (default: 500)
+
+    Requires OAuth bearer token for authentication.
+    """
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/chunk-context: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "get_chunk_context"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Get query parameters
+        doc_type = request.query_params.get("doc_type")
+        doc_id = request.query_params.get("doc_id")
+        start_str = request.query_params.get("start")
+        end_str = request.query_params.get("end")
+
+        # Validate required parameters
+        if not all([doc_type, doc_id, start_str, end_str]):
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": "Missing required parameters: doc_type, doc_id, start, end",
+                },
+                status_code=400,
+            )
+
+        # Type narrowing: we already checked these are not None above
+        assert start_str is not None
+        assert end_str is not None
+        assert doc_id is not None
+        assert doc_type is not None
+
+        # Parse and validate integer parameters with bounds checking
+        try:
+            context_chars = _parse_int_param(
+                request.query_params.get("context"),
+                500,
+                0,
+                10000,
+                "context_chars",
+            )
+            start = _parse_int_param(start_str, 0, 0, 10000000, "start")
+            end = _parse_int_param(end_str, 0, 0, 10000000, "end")
+            if end <= start:
+                raise ValueError("end must be greater than start")
+        except ValueError as e:
+            return JSONResponse({"success": False, "error": str(e)}, status_code=400)
+        # Convert doc_id to int if possible (most IDs are int)
+        doc_id_val: str | int = int(doc_id) if doc_id.isdigit() else doc_id
+
+        # Get bearer token for client initialization
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing token")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Initialize authenticated Nextcloud client
+        async with NextcloudClient.from_token(
+            base_url=nextcloud_host, token=token, username=user_id
+        ) as nc_client:
+            chunk_context = await get_chunk_with_context(
+                nc_client=nc_client,
+                user_id=user_id,
+                doc_id=doc_id_val,
+                doc_type=doc_type,
+                chunk_start=start,
+                chunk_end=end,
+                context_chars=context_chars,
+            )
+
+        if chunk_context is None:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
+                },
+                status_code=404,
+            )
+
+        # For PDF files, also fetch the highlighted page image from Qdrant if available
+        # This is useful for clients that want to show a pre-rendered image
+        highlighted_page_image = None
+        page_number = chunk_context.page_number
+
+        if doc_type == "file":
+            try:
+                settings = get_settings()
+                qdrant_client = await get_qdrant_client()
+
+                # Query for this specific chunk's highlighted image
+                points_response = await qdrant_client.scroll(
+                    collection_name=settings.get_collection_name(),
+                    scroll_filter=Filter(
+                        must=[
+                            get_placeholder_filter(),
+                            FieldCondition(
+                                key="doc_id", match=MatchValue(value=doc_id_val)
+                            ),
+                            FieldCondition(
+                                key="user_id", match=MatchValue(value=user_id)
+                            ),
+                            FieldCondition(
+                                key="chunk_start_offset", match=MatchValue(value=start)
+                            ),
+                            FieldCondition(
+                                key="chunk_end_offset", match=MatchValue(value=end)
+                            ),
+                        ]
+                    ),
+                    limit=1,
+                    with_vectors=False,
+                    with_payload=["highlighted_page_image", "page_number"],
+                )
+
+                if points_response[0]:
+                    payload = points_response[0][0].payload
+                    if payload:
+                        highlighted_page_image = payload.get("highlighted_page_image")
+                        # Trust Qdrant page number if available (might be more accurate than context expansion logic)
+                        if payload.get("page_number") is not None:
+                            page_number = payload.get("page_number")
+
+            except Exception as e:
+                logger.warning(f"Failed to fetch highlighted image: {e}")
+
+        # Build response
+        response_data = {
+            "success": True,
+            "chunk_text": chunk_context.chunk_text,
+            "before_context": chunk_context.before_context,
+            "after_context": chunk_context.after_context,
+            "has_more_before": chunk_context.has_before_truncation,
+            "has_more_after": chunk_context.has_after_truncation,
+            "page_number": page_number,
+            "chunk_index": chunk_context.chunk_index,
+            "total_chunks": chunk_context.total_chunks,
+        }
+
+        if highlighted_page_image:
+            response_data["highlighted_page_image"] = highlighted_page_image
+
+        return JSONResponse(response_data)
+
+    except Exception as e:
+        error_msg = _sanitize_error_for_client(e, "get_chunk_context")
+        return JSONResponse(
+            {"error": error_msg},
+            status_code=500,
+        )
+
+
+async def get_pdf_preview(request: Request) -> JSONResponse:
+    """GET /api/v1/pdf-preview - Render PDF page to PNG image.
+
+    Server-side PDF rendering using PyMuPDF. This endpoint allows Astrolabe
+    to display PDF pages without requiring client-side PDF.js, avoiding CSP
+    worker restrictions and ES private field issues in Chromium.
+
+    Query parameters:
+        file_path: WebDAV path to PDF file (e.g., "/Documents/report.pdf")
+        page: Page number (1-indexed, default: 1)
+        scale: Zoom factor for rendering (default: 2.0 = 144 DPI)
+
+    Returns:
+        {
+            "success": true,
+            "image": "<base64-encoded-png>",
+            "page_number": 1,
+            "total_pages": 10
+        }
+
+    Requires OAuth bearer token for authentication.
+    """
+    # Log incoming request
+    file_path_param = request.query_params.get("file_path", "<not provided>")
+    page_param = request.query_params.get("page", "1")
+    logger.info(f"PDF preview request: file_path={file_path_param}, page={page_param}")
+
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+        logger.info(f"PDF preview authenticated for user: {user_id}")
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/pdf-preview: {e}")
+        return JSONResponse(
+            {
+                "success": False,
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "get_pdf_preview"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Parse and validate parameters
+        file_path = request.query_params.get("file_path")
+        if not file_path:
+            return JSONResponse(
+                {"success": False, "error": "Missing required parameter: file_path"},
+                status_code=400,
+            )
+
+        # Validate no path traversal sequences
+        if ".." in file_path:
+            return JSONResponse(
+                {"success": False, "error": "Invalid file path"},
+                status_code=400,
+            )
+
+        try:
+            page_num = _parse_int_param(
+                request.query_params.get("page"), 1, 1, 10000, "page"
+            )
+            scale = _parse_float_param(
+                request.query_params.get("scale"), 2.0, 0.5, 5.0, "scale"
+            )
+        except ValueError as e:
+            return JSONResponse({"success": False, "error": str(e)}, status_code=400)
+
+        # Get bearer token for WebDAV authentication
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing token")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Download PDF via WebDAV using user's token
+        async with NextcloudClient.from_token(
+            base_url=nextcloud_host, token=token, username=user_id
+        ) as nc_client:
+            pdf_bytes, _ = await nc_client.webdav.read_file(file_path)
+
+        # Check file size limit (50 MB)
+        max_pdf_size = 50 * 1024 * 1024
+        if len(pdf_bytes) > max_pdf_size:
+            return JSONResponse(
+                {
+                    "success": False,
+                    "error": f"PDF file exceeds maximum size limit ({max_pdf_size // (1024 * 1024)} MB)",
+                },
+                status_code=413,
+            )
+
+        # Render page with PyMuPDF
+        doc = pymupdf.open(stream=pdf_bytes, filetype="pdf")
+        try:
+            total_pages = doc.page_count
+
+            # Validate page number
+            if page_num > total_pages:
+                return JSONResponse(
+                    {
+                        "success": False,
+                        "error": f"Page {page_num} does not exist (document has {total_pages} pages)",
+                    },
+                    status_code=400,
+                )
+
+            page = doc[page_num - 1]  # 0-indexed
+            mat = pymupdf.Matrix(scale, scale)
+            pix = page.get_pixmap(matrix=mat, alpha=False)
+            png_bytes = pix.tobytes("png")
+        finally:
+            doc.close()
+
+        # Encode as base64
+        image_b64 = base64.b64encode(png_bytes).decode("ascii")
+
+        logger.info(
+            f"Rendered PDF preview: {file_path} page {page_num}/{total_pages}, "
+            f"{len(png_bytes):,} bytes"
+        )
+
+        return JSONResponse(
+            {
+                "success": True,
+                "image": image_b64,
+                "page_number": page_num,
+                "total_pages": total_pages,
+            }
+        )
+
+    except FileNotFoundError:
+        logger.warning(f"PDF file not found: {file_path_param}")
+        return JSONResponse(
+            {"success": False, "error": "PDF file not found"},
+            status_code=404,
+        )
+    except (pymupdf.FileDataError, pymupdf.EmptyFileError):
+        logger.warning(f"Invalid or corrupted PDF file: {file_path_param}")
+        return JSONResponse(
+            {"success": False, "error": "Invalid or corrupted PDF file"},
+            status_code=400,
+        )
+    except Exception as e:
+        logger.error(f"PDF preview error: {e}", exc_info=True)
+        error_msg = _sanitize_error_for_client(e, "get_pdf_preview")
+        return JSONResponse(
+            {"success": False, "error": error_msg},
+            status_code=500,
+        )
@@ -0,0 +1,304 @@
+"""Webhook management API endpoints.
+
+Provides REST API endpoints for managing webhook registrations with Nextcloud.
+These endpoints are used by the Nextcloud PHP app (Astrolabe) to:
+- List installed Nextcloud apps
+- Create, list, and delete webhook registrations
+
+All endpoints require OAuth bearer token authentication via UnifiedTokenVerifier.
+"""
+
+import logging
+
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+
+from nextcloud_mcp_server.api.management import (
+    _sanitize_error_for_client,
+    extract_bearer_token,
+    validate_token_and_get_user,
+)
+from nextcloud_mcp_server.client.webhooks import WebhooksClient
+
+from ..http import nextcloud_httpx_client
+
+logger = logging.getLogger(__name__)
+
+
+async def get_installed_apps(request: Request) -> JSONResponse:
+    """GET /api/v1/apps - Get list of installed Nextcloud apps.
+
+    Returns a list of installed app IDs for filtering webhook presets.
+
+    Requires OAuth bearer token for authentication.
+    """
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/apps: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "get_installed_apps"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Get Bearer token from request
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing Authorization header")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Create authenticated HTTP client
+        async with nextcloud_httpx_client(
+            base_url=nextcloud_host,
+            headers={"Authorization": f"Bearer {token}"},
+            timeout=30.0,
+        ) as client:
+            # Get installed apps using OCS API
+            # Notes, Calendar, Deck, Tables, etc. are apps that support webhooks
+            # We check which ones are installed and enabled
+            ocs_url = "/ocs/v1.php/cloud/apps"
+            params = {"filter": "enabled"}
+
+            response = await client.get(
+                ocs_url,
+                params=params,
+                headers={"OCS-APIRequest": "true", "Accept": "application/json"},
+            )
+
+            if response.status_code != 200:
+                raise ValueError(f"OCS API returned status {response.status_code}")
+
+            data = response.json()
+            apps = data.get("ocs", {}).get("data", {}).get("apps", [])
+
+            return JSONResponse({"apps": apps})
+
+    except Exception as e:
+        logger.error(f"Error getting installed apps for user {user_id}: {e}")
+        return JSONResponse(
+            {
+                "error": "Internal error",
+                "message": _sanitize_error_for_client(e, "get_installed_apps"),
+            },
+            status_code=500,
+        )
+
+
+async def list_webhooks(request: Request) -> JSONResponse:
+    """GET /api/v1/webhooks - List all registered webhooks.
+
+    Returns list of webhook registrations for the authenticated user.
+
+    Requires OAuth bearer token for authentication.
+    """
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "list_webhooks"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Get Bearer token from request
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing Authorization header")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Create authenticated HTTP client
+        async with nextcloud_httpx_client(
+            base_url=nextcloud_host,
+            headers={"Authorization": f"Bearer {token}"},
+            timeout=30.0,
+        ) as client:
+            # Use WebhooksClient to list webhooks
+            webhooks_client = WebhooksClient(client, user_id)
+            webhooks = await webhooks_client.list_webhooks()
+
+            return JSONResponse({"webhooks": webhooks})
+
+    except Exception as e:
+        logger.error(f"Error listing webhooks for user {user_id}: {e}")
+        return JSONResponse(
+            {
+                "error": "Internal error",
+                "message": _sanitize_error_for_client(e, "list_webhooks"),
+            },
+            status_code=500,
+        )
+
+
+async def create_webhook(request: Request) -> JSONResponse:
+    """POST /api/v1/webhooks - Create a new webhook registration.
+
+    Request body:
+    {
+        "event": "OCP\\Files\\Events\\Node\\NodeCreatedEvent",
+        "uri": "http://mcp:8000/webhooks/nextcloud",
+        "eventFilter": {"event.node.path": "/^\\/.*\\/files\\/Notes\\//"}
+    }
+
+    Returns the created webhook data including the webhook ID.
+
+    Requires OAuth bearer token for authentication.
+    """
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "create_webhook"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Parse request body
+        body = await request.json()
+        event = body.get("event")
+        uri = body.get("uri")
+        # Accept both camelCase (eventFilter) and snake_case (event_filter)
+        event_filter = body.get("eventFilter") or body.get("event_filter")
+
+        if not event or not uri:
+            return JSONResponse(
+                {
+                    "error": "Bad request",
+                    "message": "Missing required fields: event, uri",
+                },
+                status_code=400,
+            )
+
+        # Get Bearer token from request
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing Authorization header")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Create authenticated HTTP client
+        async with nextcloud_httpx_client(
+            base_url=nextcloud_host,
+            headers={"Authorization": f"Bearer {token}"},
+            timeout=30.0,
+        ) as client:
+            # Use WebhooksClient to create webhook
+            webhooks_client = WebhooksClient(client, user_id)
+            webhook_data = await webhooks_client.create_webhook(
+                event=event, uri=uri, event_filter=event_filter
+            )
+
+            return JSONResponse({"webhook": webhook_data})
+
+    except Exception as e:
+        logger.error(f"Error creating webhook for user {user_id}: {e}")
+        return JSONResponse(
+            {
+                "error": "Internal error",
+                "message": _sanitize_error_for_client(e, "create_webhook"),
+            },
+            status_code=500,
+        )
+
+
+async def delete_webhook(request: Request) -> JSONResponse:
+    """DELETE /api/v1/webhooks/{webhook_id} - Delete a webhook registration.
+
+    Returns success/failure status.
+
+    Requires OAuth bearer token for authentication.
+    """
+    try:
+        # Validate OAuth token and extract user
+        user_id, validated = await validate_token_and_get_user(request)
+    except Exception as e:
+        logger.warning(f"Unauthorized access to /api/v1/webhooks: {e}")
+        return JSONResponse(
+            {
+                "error": "Unauthorized",
+                "message": _sanitize_error_for_client(e, "delete_webhook"),
+            },
+            status_code=401,
+        )
+
+    try:
+        # Get webhook_id from path parameter
+        webhook_id = request.path_params.get("webhook_id")
+        if not webhook_id:
+            return JSONResponse(
+                {"error": "Bad request", "message": "Missing webhook_id"},
+                status_code=400,
+            )
+
+        try:
+            webhook_id = int(webhook_id)
+        except ValueError:
+            return JSONResponse(
+                {"error": "Bad request", "message": "Invalid webhook_id"},
+                status_code=400,
+            )
+
+        # Get Bearer token from request
+        token = extract_bearer_token(request)
+        if not token:
+            raise ValueError("Missing Authorization header")
+
+        # Get Nextcloud host from OAuth context
+        oauth_ctx = request.app.state.oauth_context
+        nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
+
+        if not nextcloud_host:
+            raise ValueError("Nextcloud host not configured")
+
+        # Create authenticated HTTP client
+        async with nextcloud_httpx_client(
+            base_url=nextcloud_host,
+            headers={"Authorization": f"Bearer {token}"},
+            timeout=30.0,
+        ) as client:
+            # Use WebhooksClient to delete webhook
+            webhooks_client = WebhooksClient(client, user_id)
+            await webhooks_client.delete_webhook(webhook_id=webhook_id)
+
+            return JSONResponse({"success": True, "message": "Webhook deleted"})
+
+    except Exception as e:
+        logger.error(f"Error deleting webhook for user {user_id}: {e}")
+        return JSONResponse(
+            {
+                "error": "Internal error",
+                "message": _sanitize_error_for_client(e, "delete_webhook"),
+            },
+            status_code=500,
+        )
@@ -0,0 +1,152 @@
+"""
+Client for querying Astrolabe Management API for background sync credentials.
+
+This client uses OAuth client credentials flow to authenticate to Nextcloud
+and retrieve user app passwords for background sync operations.
+"""
+
+import logging
+import time
+from typing import Optional
+
+from ..http import nextcloud_httpx_client
+
+logger = logging.getLogger(__name__)
+
+
+class AstrolabeClient:
+    """Client for querying Astrolabe API for background sync credentials.
+
+    Uses OAuth client credentials flow to authenticate as the MCP server
+    and retrieve user app passwords that are stored in Nextcloud.
+    """
+
+    def __init__(
+        self,
+        nextcloud_host: str,
+        client_id: str,
+        client_secret: str,
+    ):
+        """
+        Initialize Astrolabe client.
+
+        Args:
+            nextcloud_host: Nextcloud base URL (e.g., https://cloud.example.com)
+            client_id: OAuth client ID for MCP server
+            client_secret: OAuth client secret
+        """
+        self.nextcloud_host = nextcloud_host.rstrip("/")
+        self.client_id = client_id
+        self.client_secret = client_secret
+        self._token_cache: Optional[dict] = None  # {access_token, expires_at}
+
+    async def get_access_token(self) -> str:
+        """
+        Get access token using OAuth client credentials flow.
+
+        Tokens are cached with 1-minute early refresh to avoid expiration.
+
+        Returns:
+            Access token string
+
+        Raises:
+            httpx.HTTPError: If token request fails
+        """
+        # Check cache
+        if self._token_cache and time.time() < self._token_cache["expires_at"]:
+            logger.debug("Using cached OAuth token for Astrolabe API")
+            return self._token_cache["access_token"]
+
+        # Discover token endpoint
+        discovery_url = f"{self.nextcloud_host}/.well-known/openid-configuration"
+
+        async with nextcloud_httpx_client() as client:
+            logger.debug(f"Discovering token endpoint from {discovery_url}")
+            discovery_resp = await client.get(discovery_url)
+            discovery_resp.raise_for_status()
+            token_endpoint = discovery_resp.json()["token_endpoint"]
+
+            logger.debug(f"Requesting client credentials token from {token_endpoint}")
+
+            # Request token using client credentials grant
+            token_resp = await client.post(
+                token_endpoint,
+                data={
+                    "grant_type": "client_credentials",
+                    "client_id": self.client_id,
+                    "client_secret": self.client_secret,
+                    "scope": "openid",  # Minimal scope
+                },
+            )
+            token_resp.raise_for_status()
+            data = token_resp.json()
+
+            # Cache with 1-minute early refresh
+            expires_in = data.get("expires_in", 3600)
+            self._token_cache = {
+                "access_token": data["access_token"],
+                "expires_at": time.time() + expires_in - 60,
+            }
+
+            logger.info(f"Obtained Astrolabe API token (expires in {expires_in}s)")
+            return data["access_token"]
+
+    async def get_user_app_password(self, user_id: str) -> Optional[str]:
+        """
+        Retrieve user's app password for background sync.
+
+        Args:
+            user_id: Nextcloud user ID
+
+        Returns:
+            App password string, or None if user hasn't provisioned
+
+        Raises:
+            httpx.HTTPError: If API request fails (except 404)
+        """
+        token = await self.get_access_token()
+        url = f"{self.nextcloud_host}/apps/astrolabe/api/v1/background-sync/credentials/{user_id}"
+
+        async with nextcloud_httpx_client() as client:
+            logger.debug(f"Retrieving app password for user: {user_id}")
+
+            response = await client.get(
+                url,
+                headers={"Authorization": f"Bearer {token}"},
+                timeout=10.0,
+            )
+
+            if response.status_code == 404:
+                logger.debug(f"No app password configured for user: {user_id}")
+                return None
+
+            response.raise_for_status()
+            data = response.json()
+
+            logger.info(
+                f"Retrieved app password for user: {user_id} (type: {data.get('credential_type')})"
+            )
+            return data.get("app_password")
+
+    async def get_background_sync_status(self, user_id: str) -> dict:
+        """
+        Get background sync status for a user.
+
+        Args:
+            user_id: Nextcloud user ID
+
+        Returns:
+            Dict with keys: has_access, credential_type, provisioned_at
+
+        Raises:
+            httpx.HTTPError: If API request fails
+        """
+        # For now, check if app password exists
+        # In the future, this could query a dedicated status endpoint
+        app_password = await self.get_user_app_password(user_id)
+
+        return {
+            "has_access": app_password is not None,
+            "credential_type": "app_password" if app_password else None,
+            "provisioned_at": None,  # TODO: Get from API if available
+        }
@@ -8,8 +8,10 @@ import hashlib
 import logging
 import os
 import secrets
+import time
 from base64 import urlsafe_b64encode
 from urllib.parse import urlencode
+from urllib.parse import urlparse as parse_url

 import httpx
 import jwt
@@ -21,9 +23,31 @@ from nextcloud_mcp_server.auth.userinfo_routes import (
    _query_idp_userinfo,
 )

+from ..http import nextcloud_httpx_client
+
 logger = logging.getLogger(__name__)


+def _should_use_secure_cookies() -> bool:
+    """Determine if cookies should have secure flag.
+
+    Checks COOKIE_SECURE env var first, then auto-detects from NEXTCLOUD_HOST.
+
+    Returns:
+        True if cookies should be secure (HTTPS), False otherwise
+    """
+    # Explicit configuration takes precedence
+    explicit = os.getenv("COOKIE_SECURE", "").lower()
+    if explicit == "true":
+        return True
+    if explicit == "false":
+        return False
+
+    # Auto-detect from NEXTCLOUD_HOST protocol
+    nextcloud_host = os.getenv("NEXTCLOUD_HOST", "")
+    return nextcloud_host.startswith("https://")
+
+
 async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    """Browser OAuth login endpoint - redirects to IdP for authentication.

@@ -50,6 +74,10 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    logger.info(f"oauth_login called - client_id: {oauth_config.get('client_id')}")
    logger.info(f"oauth_login called - oauth_client: {oauth_client is not None}")

+    # Get redirect URL from query params (default to /app)
+    next_url = request.query_params.get("next", "/app")
+    logger.info(f"oauth_login - next_url: {next_url}")
+
    # Generate state for CSRF protection
    state = secrets.token_urlsafe(32)

@@ -71,7 +99,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
    await storage.store_oauth_session(
        session_id=state,  # Use state as session ID
        client_id="browser-ui",
-        client_redirect_uri="/app",
+        client_redirect_uri=next_url,  # Store the redirect URL for after auth
        state=state,
        code_challenge=code_challenge,
        code_challenge_method="S256",
@@ -85,6 +113,11 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
        if not oauth_client.authorization_endpoint:
            await oauth_client.discover()

+        # Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
+        nextcloud_resource_uri = oauth_config.get(
+            "nextcloud_resource_uri", oauth_config.get("nextcloud_host")
+        )
+
        idp_params = {
            "client_id": oauth_client.client_id,
            "redirect_uri": callback_uri,
@@ -94,6 +127,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
            "code_challenge": code_challenge,
            "code_challenge_method": "S256",
            "prompt": "consent",  # Ensure refresh token
+            "resource": nextcloud_resource_uri,  # Request tokens for Nextcloud API access
        }

        auth_url = f"{oauth_client.authorization_endpoint}?{urlencode(idp_params)}"
@@ -111,7 +145,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
            )

        # Fetch authorization endpoint
-        async with httpx.AsyncClient() as http_client:
+        async with nextcloud_httpx_client() as http_client:
            response = await http_client.get(discovery_url)
            response.raise_for_status()
            discovery = response.json()
@@ -120,8 +154,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
        # Replace internal Docker hostname with public URL
        public_issuer = os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL")
        if public_issuer:
-            from urllib.parse import urlparse as parse_url
-
            internal_parsed = parse_url(oauth_config["nextcloud_host"])
            auth_parsed = parse_url(authorization_endpoint)

@@ -131,6 +163,11 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
                    f"{public_parsed.scheme}://{public_parsed.netloc}{auth_parsed.path}"
                )

+        # Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
+        nextcloud_resource_uri = oauth_config.get(
+            "nextcloud_resource_uri", oauth_config.get("nextcloud_host")
+        )
+
        idp_params = {
            "client_id": oauth_config["client_id"],
            "redirect_uri": callback_uri,
@@ -140,6 +177,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
            "code_challenge": code_challenge,
            "code_challenge_method": "S256",
            "prompt": "consent",  # Ensure refresh token
+            "resource": nextcloud_resource_uri,  # Request tokens for Nextcloud API access
        }

        # Debug: Log full parameters
@@ -214,12 +252,15 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
    oauth_client = oauth_ctx["oauth_client"]
    oauth_config = oauth_ctx["config"]

-    # Retrieve code_verifier from session storage (PKCE required for all modes)
+    # Retrieve code_verifier and redirect URL from session storage
    code_verifier = ""
+    next_url = "/app"  # Default redirect
    oauth_session = await storage.get_oauth_session(state)
    if oauth_session:
        # code_verifier was stored in mcp_authorization_code field
        code_verifier = oauth_session.get("mcp_authorization_code", "")
+        # next_url was stored in client_redirect_uri field
+        next_url = oauth_session.get("client_redirect_uri", "/app")
        # Clean up the temporary session
        # Note: We don't have delete_oauth_session method, but it will expire after TTL

@@ -246,7 +287,7 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
            if code_verifier:
                token_params["code_verifier"] = code_verifier

-            async with httpx.AsyncClient() as http_client:
+            async with nextcloud_httpx_client() as http_client:
                response = await http_client.post(
                    oauth_client.token_endpoint,
                    data=token_params,
@@ -256,7 +297,7 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
        else:
            # Integrated mode (Nextcloud OIDC)
            discovery_url = oauth_config.get("discovery_url")
-            async with httpx.AsyncClient() as http_client:
+            async with nextcloud_httpx_client() as http_client:
                response = await http_client.get(discovery_url)
                response.raise_for_status()
                discovery = response.json()
@@ -274,7 +315,7 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
            if code_verifier:
                token_params["code_verifier"] = code_verifier

-            async with httpx.AsyncClient() as http_client:
+            async with nextcloud_httpx_client() as http_client:
                response = await http_client.post(
                    token_endpoint,
                    data=token_params,
@@ -338,16 +379,33 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
        user_id = f"user-{secrets.token_hex(8)}"
        username = "unknown"

+    # Calculate refresh token expiration from token response
+    refresh_expires_in = token_data.get("refresh_expires_in")
+    refresh_expires_at = None
+    if refresh_expires_in:
+        refresh_expires_at = int(time.time()) + refresh_expires_in
+        logger.info(
+            f"Refresh token expires in {refresh_expires_in}s (at timestamp {refresh_expires_at})"
+        )
+
+    # Extract granted scopes
+    granted_scopes = (
+        token_data.get("scope", "").split() if token_data.get("scope") else None
+    )
+
    # Store refresh token (for background jobs ONLY)
    if refresh_token:
        logger.info(f"Storing refresh token for user_id: {user_id}")
        logger.info(f"  State parameter (provisioning_client_id): {state[:16]}...")
+        logger.info(f"  Granted scopes: {granted_scopes}")
+        logger.info(f"  Expires at: {refresh_expires_at}")
        await storage.store_refresh_token(
            user_id=user_id,
            refresh_token=refresh_token,
-            expires_at=None,
+            expires_at=refresh_expires_at,
            flow_type="browser",  # Browser-based login flow
            provisioning_client_id=state,  # Store state for unified session lookup
+            scopes=granted_scopes,
        )
        logger.info(f"✓ Refresh token stored successfully for user_id: {user_id}")
        logger.info(
@@ -383,13 +441,14 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
            # Continue anyway - profile cache is optional for browser UI

    # Create response and set session cookie
-    response = RedirectResponse("/app", status_code=302)
+    # Redirect to stored next_url (from OAuth session) or /app as default
+    response = RedirectResponse(next_url, status_code=302)
    response.set_cookie(
        key="mcp_session",
        value=user_id,
        max_age=86400 * 30,  # 30 days
        httponly=True,
-        secure=False,  # Set to True in production with HTTPS
+        secure=_should_use_secure_cookies(),
        samesite="lax",
    )

@@ -10,6 +10,8 @@ import httpx

 from nextcloud_mcp_server.auth.storage import RefreshTokenStorage

+from ..http import nextcloud_httpx_client
+
 logger = logging.getLogger(__name__)


@@ -81,6 +83,7 @@ async def register_client(
    scopes: str = "openid profile email",
    token_type: str | None = "Bearer",
    resource_url: str | None = None,
+    max_retries: int = 3,
 ) -> ClientInfo:
    """
    Register a new OAuth client using RFC 7591 Dynamic Client Registration.
@@ -96,6 +99,7 @@ async def register_client(
        token_type: Type of access tokens (default: "Bearer", supports "JWT" for Nextcloud).
                    Set to None to omit this field (required for Keycloak and other standard providers).
        resource_url: OAuth 2.0 Protected Resource URL (RFC 9728) - used for token introspection authorization
+        max_retries: Maximum number of retries for 429 responses (default: 3)

    Returns:
        ClientInfo with registration details
@@ -132,58 +136,92 @@ async def register_client(
    logger.info(f"Registering OAuth client with Nextcloud: {client_name}")
    logger.debug(f"Registration endpoint: {registration_endpoint}")

-    async with httpx.AsyncClient(timeout=30.0) as client:
-        try:
-            response = await client.post(
-                registration_endpoint,
-                json=client_metadata,
-                headers={"Content-Type": "application/json"},
-            )
-            response.raise_for_status()
-
-            client_info = response.json()
-            logger.info(
-                f"Successfully registered client: {client_info.get('client_id')}"
-            )
-            expires_at = dt.datetime.fromtimestamp(
-                client_info.get("client_secret_expires_at")
-            )
-            logger.info(
-                f"Client expires at: {expires_at} "
-                f"(in {client_info.get('client_secret_expires_at', 0) - int(time.time())} seconds)"
-            )
-
-            # Log if RFC 7592 fields are present
-            has_reg_token = "registration_access_token" in client_info
-            has_reg_uri = "registration_client_uri" in client_info
-            if has_reg_token and has_reg_uri:
-                logger.info(
-                    "RFC 7592 management fields received - client deletion will be supported"
+    async with nextcloud_httpx_client(timeout=30.0) as client:
+        for attempt in range(max_retries):
+            try:
+                response = await client.post(
+                    registration_endpoint,
+                    json=client_metadata,
+                    headers={"Content-Type": "application/json"},
                )
-            else:
-                logger.warning("RFC 7592 fields missing - client deletion may not work")

-            return ClientInfo(
-                client_id=client_info["client_id"],
-                client_secret=client_info["client_secret"],
-                client_id_issued_at=client_info.get(
-                    "client_id_issued_at", int(time.time())
-                ),
-                client_secret_expires_at=client_info.get(
-                    "client_secret_expires_at", int(time.time()) + 3600
-                ),
-                redirect_uris=client_info.get("redirect_uris", redirect_uris),
-                registration_access_token=client_info.get("registration_access_token"),
-                registration_client_uri=client_info.get("registration_client_uri"),
-            )
+                if response.status_code == 429:
+                    # Rate limited - retry with exponential backoff
+                    if attempt < max_retries - 1:
+                        retry_after = int(response.headers.get("Retry-After", 2))
+                        wait_time = min(retry_after, 2**attempt)
+                        logger.warning(
+                            f"Rate limited (429) registering client, "
+                            f"retrying in {wait_time}s (attempt {attempt + 1}/{max_retries})"
+                        )
+                        await anyio.sleep(wait_time)
+                        continue
+                    else:
+                        logger.error(
+                            f"Failed to register client after {max_retries} attempts: Rate limited (429)"
+                        )
+                        response.raise_for_status()

-        except httpx.HTTPStatusError as e:
-            logger.error(f"Failed to register client: HTTP {e.response.status_code}")
-            logger.error(f"Response: {e.response.text}")
-            raise
-        except KeyError as e:
-            logger.error(f"Invalid response from registration endpoint: missing {e}")
-            raise ValueError(f"Invalid registration response: missing {e}")
+                response.raise_for_status()
+
+                client_info = response.json()
+                logger.info(
+                    f"Successfully registered client: {client_info.get('client_id')}"
+                )
+                expires_at = dt.datetime.fromtimestamp(
+                    client_info.get("client_secret_expires_at")
+                )
+                logger.info(
+                    f"Client expires at: {expires_at} "
+                    f"(in {client_info.get('client_secret_expires_at', 0) - int(time.time())} seconds)"
+                )
+
+                # Log if RFC 7592 fields are present
+                has_reg_token = "registration_access_token" in client_info
+                has_reg_uri = "registration_client_uri" in client_info
+                if has_reg_token and has_reg_uri:
+                    logger.info(
+                        "RFC 7592 management fields received - client deletion will be supported"
+                    )
+                else:
+                    logger.warning(
+                        "RFC 7592 fields missing - client deletion may not work"
+                    )
+
+                return ClientInfo(
+                    client_id=client_info["client_id"],
+                    client_secret=client_info["client_secret"],
+                    client_id_issued_at=client_info.get(
+                        "client_id_issued_at", int(time.time())
+                    ),
+                    client_secret_expires_at=client_info.get(
+                        "client_secret_expires_at", int(time.time()) + 3600
+                    ),
+                    redirect_uris=client_info.get("redirect_uris", redirect_uris),
+                    registration_access_token=client_info.get(
+                        "registration_access_token"
+                    ),
+                    registration_client_uri=client_info.get("registration_client_uri"),
+                )
+
+            except httpx.HTTPStatusError as e:
+                logger.error(
+                    f"Failed to register client: HTTP {e.response.status_code}"
+                )
+                logger.error(f"Response: {e.response.text}")
+                raise
+            except KeyError as e:
+                logger.error(
+                    f"Invalid response from registration endpoint: missing {e}"
+                )
+                raise ValueError(f"Invalid registration response: missing {e}")
+
+    # Should not reach here, but raise if we do
+    raise httpx.HTTPStatusError(
+        "Registration failed after retries",
+        request=httpx.Request("POST", registration_endpoint),
+        response=httpx.Response(429),
+    )


 async def delete_client(
@@ -229,7 +267,7 @@ async def delete_client(
    logger.info(f"Deleting OAuth client: {client_id[:16]}...")
    logger.debug(f"Deletion endpoint: {deletion_endpoint}")

-    async with httpx.AsyncClient(timeout=30.0) as http_client:
+    async with nextcloud_httpx_client(timeout=30.0) as http_client:
        for attempt in range(max_retries):
            try:
                # Prefer RFC 7592 Bearer token authentication
@@ -10,6 +10,7 @@ import logging
 import os
 from dataclasses import dataclass
 from typing import Dict, List, Optional
+from urllib.parse import urlparse

 logger = logging.getLogger(__name__)

@@ -141,8 +142,8 @@ class ClientRegistry:
            if not self._validate_redirect_uri(client, redirect_uri):
                return False, f"Invalid redirect_uri for client {client_id}"

-        # Validate scopes if provided
-        if scopes:
+        # Validate scopes if provided (wildcard "*" allows all scopes)
+        if scopes and "*" not in client.allowed_scopes:
            invalid_scopes = set(scopes) - set(client.allowed_scopes)
            if invalid_scopes:
                return False, f"Invalid scopes for client {client_id}: {invalid_scopes}"
@@ -161,8 +162,6 @@ class ClientRegistry:
            True if valid, False otherwise
        """
        # Parse the redirect URI
-        from urllib.parse import urlparse
-
        parsed = urlparse(redirect_uri)

        # Check against registered patterns
@@ -203,6 +202,29 @@ class ClientRegistry:
        # In production, would persist to database
        return True

+    def register_proxy_client(
+        self, client_id: str, redirect_uris: list[str], name: str = ""
+    ) -> None:
+        """Register a client discovered via DCR proxy.
+
+        When the MCP server acts as an OAuth AS proxy, clients register via
+        the proxy's /oauth/register endpoint. This method stores the client
+        locally so /oauth/authorize can validate it.
+
+        Args:
+            client_id: Client identifier from Nextcloud DCR response
+            redirect_uris: Allowed redirect URIs
+            name: Optional human-readable name
+        """
+        self._clients[client_id] = MCPClientInfo(
+            client_id=client_id,
+            name=name or f"DCR-{client_id[:8]}",
+            redirect_uris=redirect_uris or ["http://localhost:*", "http://127.0.0.1:*"],
+            allowed_scopes=["*"],  # Nextcloud enforces actual scopes
+            is_public=True,
+        )
+        logger.info(f"Registered proxy client: {client_id}")
+
    def get_client(self, client_id: str) -> Optional[MCPClientInfo]:
        """
        Get client information.
@@ -12,6 +12,10 @@ from mcp.server.fastmcp import Context

 from ..client import NextcloudClient
 from ..config import get_settings
+from ..observability.metrics import (
+    oauth_token_cache_hits_total,
+    oauth_token_exchange_total,
+)
 from .token_exchange import exchange_token_for_audience

 logger = logging.getLogger(__name__)
@@ -138,6 +142,7 @@ async def get_session_client_from_context(
                logger.debug(
                    f"Using cached exchanged token (expires in {expiry - time.time():.1f}s)"
                )
+                oauth_token_cache_hits_total.labels(hit="true").inc()
                return NextcloudClient.from_token(
                    base_url=base_url, token=cached_token, username=username
                )
@@ -145,17 +150,24 @@ async def get_session_client_from_context(
                logger.debug("Cached token expired, removing from cache")
                del _exchange_cache[cache_key]

+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
        # Perform RFC 8693 token exchange
        logger.info(f"Exchanging MCP token for Nextcloud API token (user: {username})")

-        # Exchange for Nextcloud resource URI audience
-        exchanged_token, expires_in = await exchange_token_for_audience(
-            subject_token=mcp_token,
-            requested_audience=settings.nextcloud_resource_uri or "nextcloud",
-            requested_scopes=None,  # Nextcloud doesn't support scopes
-        )
+        try:
+            # Exchange for Nextcloud resource URI audience
+            exchanged_token, expires_in = await exchange_token_for_audience(
+                subject_token=mcp_token,
+                requested_audience=settings.nextcloud_resource_uri or "nextcloud",
+                requested_scopes=None,  # Nextcloud doesn't support scopes
+            )
+            oauth_token_exchange_total.labels(status="success").inc()

-        logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+            logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+        except Exception:
+            oauth_token_exchange_total.labels(status="error").inc()
+            raise

        # Cache the exchanged token
        # Use the minimum of exchange TTL and configured cache TTL
@@ -0,0 +1,88 @@
+"""MCP elicitation helpers for Login Flow v2.
+
+Provides a unified way to present login URLs to users, using MCP elicitation
+when the client supports it, or falling back to returning the URL in a message.
+"""
+
+import logging
+
+from mcp.server.fastmcp import Context
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)
+
+
+class LoginFlowConfirmation(BaseModel):
+    """Schema for Login Flow v2 confirmation elicitation."""
+
+    acknowledged: bool = Field(
+        default=False,
+        description="Check this box after completing login at the provided URL",
+    )
+
+
+async def present_login_url(
+    ctx: Context,
+    login_url: str,
+    message: str | None = None,
+) -> str:
+    """Present a login URL to the user via MCP elicitation or message.
+
+    Tries MCP elicitation first (ctx.elicit) for interactive clients.
+    Falls back to returning the URL as a plain message.
+
+    Args:
+        ctx: MCP context
+        login_url: URL the user should open in their browser
+        message: Optional custom message (defaults to standard Login Flow prompt)
+
+    Returns:
+        "accepted" if user acknowledged via elicitation,
+        "declined" if user declined,
+        "message_only" if elicitation not supported (URL returned in message)
+    """
+    if message is None:
+        message = (
+            f"Please log in to Nextcloud to grant access:\n\n"
+            f"{login_url}\n\n"
+            f"Open this URL in your browser, log in, and grant the requested permissions. "
+            f"Then check the box below and click OK."
+        )
+
+    if not hasattr(ctx, "elicit"):
+        logger.debug(
+            "Elicitation not available (no elicit method), returning URL in message"
+        )
+        return "message_only"
+
+    try:
+        result = await ctx.elicit(
+            message=message,
+            schema=LoginFlowConfirmation,
+        )
+
+        if result.action == "accept":
+            if hasattr(result, "data") and not result.data.acknowledged:  # type: ignore[union-attr]
+                logger.warning(
+                    "User accepted login flow without checking the acknowledged box — "
+                    "login completion will be verified via polling"
+                )
+            logger.info("User acknowledged login flow completion")
+            return "accepted"
+        elif result.action == "decline":
+            logger.info("User declined login flow")
+            return "declined"
+        else:
+            logger.info("User cancelled login flow")
+            return "cancelled"
+
+    except NotImplementedError:
+        # Elicitation not supported by this client/SDK - fall back to message
+        logger.debug("Elicitation not available, returning URL in message")
+        return "message_only"
+    except Exception as e:
+        logger.warning(
+            f"Elicitation failed unexpectedly ({type(e).__name__}: {e}), "
+            "falling back to message"
+        )
+        return "message_only"
@@ -8,6 +8,7 @@ Handles OAuth flows with Keycloak as the identity provider, including:
 - Integration with RefreshTokenStorage
 """

+import base64
 import hashlib
 import logging
 import os
@@ -17,6 +18,8 @@ from urllib.parse import urlencode, urlparse

 import httpx

+from ..http import nextcloud_httpx_client
+
 logger = logging.getLogger(__name__)


@@ -106,7 +109,7 @@ class KeycloakOAuthClient:
    async def _get_http_client(self) -> httpx.AsyncClient:
        """Get or create HTTP client"""
        if self._http_client is None:
-            self._http_client = httpx.AsyncClient(timeout=30.0)
+            self._http_client = nextcloud_httpx_client(timeout=30.0)
        return self._http_client

    async def close(self) -> None:
@@ -155,7 +158,6 @@ class KeycloakOAuthClient:
        Returns:
            Tuple of (code_verifier, code_challenge)
        """
-        import base64

        # Generate code verifier (43-128 characters)
        code_verifier = secrets.token_urlsafe(32)
--- a/Show More
+++ b/Show More