bump: version 0.35.0 → 0.36.0

Merge pull request #302 from cbcoutinho/feature/viz
feat: Vector visualization enhancements and search optimizations
2025-11-15 23:32:55 +00:00 · 2025-11-16 00:32:31 +01:00 · 2025-11-15 06:48:58 +01:00 · 2025-11-15 06:21:06 +01:00 · 2025-11-15 05:39:07 +01:00 · 2025-11-15 05:19:35 +01:00
55 changed files with 7925 additions and 1428 deletions
@@ -20,7 +20,7 @@ jobs:
      - name: Checkout
        uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
      - name: Install uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
      - name: Install Python 3.11
        run: uv python install 3.11
      - name: Build
@@ -11,7 +11,7 @@ jobs:
    steps:
      - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
      - name: Check format
        run: |
          uv run --frozen ruff format --diff
@@ -56,7 +56,7 @@ jobs:
          up-flags: "--build"

      - name: Install the latest version of uv
-        uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
+        uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3

      - name: Install Playwright dependencies
        run: |
@@ -85,4 +85,4 @@ jobs:
          NEXTCLOUD_USERNAME: "admin"
          NEXTCLOUD_PASSWORD: "admin"
        run: |
-          uv run pytest -v --log-cli-level=WARN --ignore=tests/manual
+          uv run pytest -v --log-cli-level=WARN -m smoke
@@ -5,6 +5,9 @@ __pycache__/
 .env.local
 .env.*.local

+# Git
+worktrees/
+
 docker-compose.override.yml

 # Generated by pytest used to login users
@@ -1,3 +1,97 @@
+## v0.36.0 (2025-11-15)
+
+### BREAKING CHANGE
+
+- Search algorithms now require Qdrant to be populated.
+Vector sync must be enabled and documents indexed for search to work.
+
+### Feat
+
+- Normalize hybrid search RRF scores to 0-1 range
+- Enhance vector visualization UI and parallelize search verification
+- Add Vector Viz tab to app home page
+- Add vector visualization pane with multi-select document types
+- Implement custom PCA to remove sklearn dependency
+- Add multi-document Protocol with cross-app search support
+- Update nc_semantic_search tool with algorithm selection
+- Implement unified search algorithm module
+
+### Fix
+
+- Reorder tabs and fix viz pane session access
+
+### Refactor
+
+- Optimize Nextcloud access verification with centralized filtering
+- Make all search algorithms query Qdrant payload, not Nextcloud
+
+### Perf
+
+- Exclude vector-sync status polling from distributed tracing
+
+## v0.35.0 (2025-11-15)
+
+### Feat
+
+- Enable SSE transport for mcp service and update test fixtures
+
+## v0.34.2 (2025-11-13)
+
+### Fix
+
+- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
+
+## v0.34.1 (2025-11-13)
+
+### Fix
+
+- return all notes when search query is empty
+
+## v0.34.0 (2025-11-13)
+
+### Feat
+
+- Complete Phase 5 - Instrument all 93 MCP tools
+- Add instrumentation decorator and apply to notes tools (Phase 5)
+- Add OAuth token and database metrics (Phases 3-4)
+- Add metrics instrumentation for queue, health, and database operations
+
+## v0.33.1 (2025-11-13)
+
+### Fix
+
+- Move grafana_folder from labels to annotations
+
+## v0.33.0 (2025-11-13)
+
+### Feat
+
+- Add Grafana dashboard and vector sync metric instrumentation
+
+## v0.32.1 (2025-11-12)
+
+### Fix
+
+- add dynamic dimension detection for Ollama embedding models
+
+## v0.32.0 (2025-11-11)
+
+### Feat
+
+- **ollama**: Pull model on startup if not available in ollama
+- add dynamic vector sync status updates with htmx polling
+- add webhook management UI and BeforeNodeDeletedEvent support
+- validate Nextcloud webhook schemas and document findings
+
+### Fix
+
+- improve webapp tab UI with CSS Grid and viewport-filling container
+
+### Refactor
+
+- move webapp from /user/page to /app
+- consolidate database storage for webhooks and OAuth tokens
+
 ## v0.31.1 (2025-11-10)

 ### Refactor
@@ -1,4 +1,4 @@
-FROM ghcr.io/astral-sh/uv:0.9.8-python3.11-alpine@sha256:6c842c49ad032f46b62f32a7e7779f45f12671a8e0d82ea24c766ab62d58b396
+FROM ghcr.io/astral-sh/uv:0.9.9-python3.11-alpine@sha256:0faa7934fac1db7f5056f159c1224d144bab864fd2677a4066d25a686ae32edd

 # Install dependencies
 # 1. git (required for caldav dependency from git)
@@ -2,8 +2,8 @@ apiVersion: v2
 name: nextcloud-mcp-server
 description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
 type: application
-version: 0.31.1
-appVersion: "0.31.1"
+version: 0.36.0
+appVersion: "0.36.0"
 keywords:
  - nextcloud
  - mcp
@@ -21,6 +21,10 @@ home: https://github.com/cbcoutinho/nextcloud-mcp-server
 sources:
  - https://github.com/cbcoutinho/nextcloud-mcp-server
 icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
+annotations:
+  # Grafana dashboard support
+  grafana_dashboard: "true"
+  grafana_dashboard_folder: "Nextcloud MCP"
 dependencies:
  - name: qdrant
    version: "1.15.5"
@@ -280,6 +280,72 @@ Use OpenAI or any OpenAI-compatible API instead of Ollama.
 | `openai.secretKey` | Key in secret containing API key | `api-key` |
 | `openai.baseUrl` | Custom API endpoint (optional) | `""` |

+#### Observability & Monitoring
+
+The chart includes comprehensive observability features including Prometheus metrics, OpenTelemetry tracing, and Grafana dashboards.
+
+**Metrics Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.metrics.enabled` | Enable Prometheus metrics | `true` |
+| `observability.metrics.port` | Metrics port | `9090` |
+| `observability.metrics.path` | Metrics endpoint path | `/metrics` |
+
+**Tracing Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.tracing.enabled` | Enable OpenTelemetry tracing | `false` |
+| `observability.tracing.endpoint` | OTLP collector endpoint | `""` |
+| `observability.tracing.serviceName` | Service name in traces | `nextcloud-mcp-server` |
+| `observability.tracing.samplingRate` | Trace sampling rate (0.0-1.0) | `1.0` |
+
+**Logging Configuration:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `observability.logging.format` | Log format (json or text) | `json` |
+| `observability.logging.level` | Log level | `INFO` |
+| `observability.logging.includeTraceContext` | Include trace IDs in logs | `true` |
+
+**ServiceMonitor (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `serviceMonitor.enabled` | Create ServiceMonitor resource | `false` |
+| `serviceMonitor.interval` | Scrape interval | `30s` |
+| `serviceMonitor.scrapeTimeout` | Scrape timeout | `10s` |
+| `serviceMonitor.labels` | Additional labels for ServiceMonitor | `{}` |
+
+**PrometheusRule (Prometheus Operator):**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `prometheusRule.enabled` | Create PrometheusRule with alert rules | `false` |
+| `prometheusRule.labels` | Additional labels for PrometheusRule | `{}` |
+
+**Grafana Dashboards:**
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `dashboards.enabled` | Enable automatic dashboard provisioning | `false` |
+| `dashboards.grafanaFolder` | Grafana folder name for dashboards | `Nextcloud MCP` |
+| `dashboards.labels` | Additional labels for dashboard ConfigMap | `{}` |
+| `dashboards.annotations` | Additional annotations for dashboard ConfigMap | `{}` |
+
+When `dashboards.enabled` is `true`, a ConfigMap with the Grafana dashboard is created with the `grafana_dashboard: "1"` label. This enables automatic discovery by Grafana sidecar containers (commonly used with kube-prometheus-stack).
+
+The dashboard provides comprehensive monitoring including:
+- HTTP request metrics (RED pattern: Rate, Errors, Duration)
+- MCP tool performance and errors
+- Nextcloud API performance by app (notes, calendar, contacts, etc.)
+- OAuth token operations and cache hit rates
+- External dependency health (Nextcloud, Qdrant, Keycloak, Unstructured API)
+- Vector sync processing pipeline (when enabled)
+
+For manual import or more details, see `charts/nextcloud-mcp-server/dashboards/README.md`.
+
 ## Examples

 ### Example 1: Basic Auth with Ingress
@@ -6,14 +6,57 @@ This directory contains example Grafana dashboards for monitoring the Nextcloud

 ### nextcloud-mcp-server.json

-Comprehensive dashboard with the following panels:
+All-in-one Operations Dashboard with comprehensive monitoring across all system components.

- **Request Rate**: HTTP requests per second by method and endpoint
- **Error Rate**: Percentage of 5xx errors
- **Request Latency**: P50 and P95 latency by endpoint
- **Top MCP Tools**: Most frequently called tools
- **Nextcloud API Latency**: API call latency by app (notes, calendar, etc.)
- **Vector Sync Queue**: Queue size for background document processing
+#### Overview Row
+High-level metrics for quick health assessment:
+- **Request Rate** (stat): Total requests per second
+- **Error Rate** (stat): Percentage of 5xx errors with color thresholds
+- **P95 Latency** (stat): 95th percentile request latency
+- **Active Requests** (stat): Current in-flight requests
+
+#### HTTP Metrics (RED Pattern)
+Core request/error/duration metrics:
+- **Request Rate by Endpoint** (timeseries): RPS breakdown by endpoint
+- **Error Rate by Status Code** (timeseries): Error rates for 4xx/5xx codes
+- **Latency Percentiles** (timeseries): P50, P95, P99 latency trends
+- **Status Code Distribution** (piechart): Percentage breakdown of all status codes
+
+#### MCP Tools Row
+MCP-specific tool performance:
+- **Top Tools by Call Volume** (bargauge): Top 10 most-called tools
+- **Tool Error Rate** (timeseries): Error rates per tool
+- **Tool Execution Duration** (timeseries): P95 latency by tool
+
+#### Nextcloud API Row
+Backend API performance metrics:
+- **API Calls by App** (timeseries): Request rate per Nextcloud app (notes, calendar, contacts, etc.)
+- **API Latency by App** (timeseries): P95 latency per app
+- **API Retries by Reason** (timeseries): Retry patterns (429, timeout, connection errors)
+- **API Error Rate** (stat): Overall API error percentage
+
+#### OAuth & Authentication Row
+OAuth token operations and caching:
+- **Token Validations** (timeseries): Success/failure rates for token validation
+- **Token Exchange Operations** (timeseries): RFC 8693 token exchange operations
+- **Token Cache Hit Rate** (stat): Percentage of cache hits (color-coded: red<50%, yellow<80%, green≥80%)
+- **Refresh Token Operations** (timeseries): Refresh token storage operations by type
+
+#### Dependencies & Health Row
+External dependency status monitoring:
+- **Nextcloud Health** (stat): UP/DOWN status with color coding
+- **Qdrant Health** (stat): Vector database health status
+- **Keycloak Health** (stat): Identity provider health status
+- **Unstructured API Health** (stat): Document processing API status
+- **Health Check Duration** (timeseries): Health check latency by dependency
+- **Database Operation Latency** (timeseries): P95 latency for DB operations (SQLite, Qdrant)
+
+#### Vector Sync Row (when enabled)
+Document processing pipeline metrics:
+- **Documents Processed Rate** (timeseries): Processing throughput by status (success/failure)
+- **Processing Queue Depth** (gauge): Current queue size with thresholds (yellow>50, red>100)
+- **Qdrant Operations** (timeseries): Vector database operations by type
+- **Document Processing Duration** (timeseries): P95 processing latency

 ## Importing to Grafana

@@ -25,49 +68,77 @@ Comprehensive dashboard with the following panels:
 4. Select your Prometheus data source
 5. Click "Import"

-### Automated Import (Kubernetes)
+### Automated Import (Helm Chart)

-If using the Grafana Operator or kube-prometheus-stack, you can create a ConfigMap:
+The Helm chart now supports automatic dashboard provisioning via Grafana sidecar pattern.
+
+#### Option 1: Using Helm Chart (Recommended)
+
+Enable dashboard provisioning in your Helm values:
+
+```yaml
+# values.yaml for nextcloud-mcp-server chart
+dashboards:
+  enabled: true
+  grafanaFolder: "Nextcloud MCP"  # Folder name in Grafana
+  labels: {}  # Additional labels if needed
+```
+
+Then deploy or upgrade:

 ```bash
-kubectl create configmap nextcloud-mcp-dashboards \
+helm upgrade --install nextcloud-mcp nextcloud-mcp-server \
+  --set dashboards.enabled=true
+```
+
+The dashboard will be automatically imported by Grafana if the sidecar is configured
+to watch for ConfigMaps with label `grafana_dashboard: "1"`.
+
+#### Option 2: Using kube-prometheus-stack
+
+If using kube-prometheus-stack with Grafana sidecar enabled, the dashboard will be
+automatically discovered and imported. Ensure your Grafana deployment has:
+
+```yaml
+# kube-prometheus-stack values
+grafana:
+  sidecar:
+    dashboards:
+      enabled: true
+      label: grafana_dashboard
+      folder: /tmp/dashboards
+      provider:
+        foldersFromFilesStructure: true
+```
+
+#### Option 3: Manual ConfigMap Creation
+
+For other Grafana setups, create a ConfigMap manually:
+
+```bash
+kubectl create configmap nextcloud-mcp-dashboard \
  --from-file=nextcloud-mcp-server.json \
  -n monitoring

-# Add label for Grafana sidecar to discover
-kubectl label configmap nextcloud-mcp-dashboards \
+# Add sidecar discovery label
+kubectl label configmap nextcloud-mcp-dashboard \
  grafana_dashboard=1 \
  -n monitoring
-```

-Or add to your Helm values:
-
-```yaml
-# values.yaml for kube-prometheus-stack
-grafana:
-  dashboardProviders:
-    dashboardproviders.yaml:
-      apiVersion: 1
-      providers:
-        - name: 'nextcloud-mcp'
-          orgId: 1
-          folder: 'Nextcloud MCP'
-          type: file
-          disableDeletion: false
-          editable: true
-          options:
-            path: /var/lib/grafana/dashboards/nextcloud-mcp
-
-  dashboardsConfigMaps:
-    nextcloud-mcp: nextcloud-mcp-dashboards
+# Add folder annotation (annotations support spaces, unlike labels)
+kubectl annotate configmap nextcloud-mcp-dashboard \
+  grafana_folder="Nextcloud MCP" \
+  -n monitoring
 ```

 ## Dashboard Variables

-The dashboard includes two variables:
+The dashboard includes four template variables for dynamic filtering:

- **Data Source**: Select your Prometheus data source
- **Namespace**: Filter metrics by Kubernetes namespace
+- **datasource**: Select your Prometheus data source
+- **namespace**: Filter metrics by Kubernetes namespace (supports "All")
+- **pod**: Filter by specific pod(s) - multi-select enabled (supports "All")
+- **interval**: Query interval for rate calculations (1m, 5m, 10m, 30m, 1h - default: 5m)

 ## Customization

@@ -96,6 +96,30 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
   kubectl --namespace {{ .Release.Namespace }} exec -it deploy/{{ include "nextcloud-mcp-server.fullname" . }} -- curl -s http://localhost:{{ include "nextcloud-mcp-server.port" . }}/user/page | grep "Vector Sync"
 {{- end }}

+{{- if .Values.dashboards.enabled }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Enabled
+   - ConfigMap: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+   - Grafana Folder: {{ .Values.dashboards.grafanaFolder }}
+
+   The dashboard will be automatically imported by Grafana if the sidecar is configured
+   to watch for ConfigMaps with label "grafana_dashboard: 1".
+
+   To manually import the dashboard:
+   kubectl --namespace {{ .Release.Namespace }} get configmap {{ include "nextcloud-mcp-server.fullname" . }}-dashboard -o jsonpath='{.data.nextcloud-mcp-server\.json}' | jq . > dashboard.json
+
+   Then import dashboard.json via Grafana UI (Dashboards → Import).
+{{- else }}
+
+6. Grafana Dashboards:
+   - Dashboard provisioning: Disabled
+   - To enable automatic dashboard provisioning, set: dashboards.enabled=true
+
+   Manual import option:
+   The dashboard JSON is available in the chart at charts/nextcloud-mcp-server/dashboards/nextcloud-mcp-server.json
+{{- end }}
+
 For more information and documentation:
 - GitHub: https://github.com/cbcoutinho/nextcloud-mcp-server
 - Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
@@ -0,0 +1,25 @@
+{{- if .Values.dashboards.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "nextcloud-mcp-server.fullname" . }}-dashboard
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
+    {{- with .Values.dashboards.labels }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana sidecar discovery label
+    grafana_dashboard: "1"
+  annotations:
+    {{- with .Values.dashboards.annotations }}
+    {{- toYaml . | nindent 4 }}
+    {{- end }}
+    # Grafana folder name (annotations support spaces, unlike labels)
+    {{- if .Values.dashboards.grafanaFolder }}
+    grafana_folder: {{ .Values.dashboards.grafanaFolder | quote }}
+    {{- end }}
+data:
+  nextcloud-mcp-server.json: |-
+{{ .Files.Get "dashboards/nextcloud-mcp-server.json" | indent 4 }}
+{{- end }}
@@ -205,6 +205,20 @@ prometheusRule:
  # Additional labels for PrometheusRule (e.g., for Prometheus selector)
  # Example: { prometheus: kube-prometheus }

+# Grafana dashboards (requires Grafana with sidecar enabled)
+dashboards:
+  # Enable automatic dashboard provisioning via ConfigMap
+  enabled: false
+  # Grafana folder name where dashboards will be imported
+  # The grafana-sidecar looks for ConfigMaps with label "grafana_dashboard: 1"
+  # and reads the folder name from annotation "grafana_folder" (supports spaces)
+  grafanaFolder: "Nextcloud MCP"
+  # Additional labels for dashboard ConfigMap
+  # These will be added alongside the required "grafana_dashboard: 1" label
+  labels: {}
+  # Additional annotations for dashboard ConfigMap
+  annotations: {}
+
 service:
  type: ClusterIP
  port: 8000
@@ -3,7 +3,7 @@ services:
  # https://hub.docker.com/_/mariadb
  db:
    # Note: Check the recommend version here: https://docs.nextcloud.com/server/latest/admin_manual/installation/system_requirements.html#server
-    image: docker.io/library/mariadb:lts@sha256:ae6119716edac6998ae85508431b3d2e666530ddf4e94c61a10710caec9b0f71
+    image: docker.io/library/mariadb:lts@sha256:6b848cb24fbbd87429917f6c4422ac53c343e85692eb0fef86553e99e4f422f3
    restart: always
    command: --transaction-isolation=READ-COMMITTED
    volumes:
@@ -34,7 +34,7 @@ services:
      - ./app-hooks:/docker-entrypoint-hooks.d:ro
      # Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
      # The post-installation hook will register /opt/apps as an additional app directory
-      - ./third_party:/opt/apps:ro
+      #- ./third_party:/opt/apps:ro
    environment:
      - NEXTCLOUD_TRUSTED_DOMAINS=app
      - NEXTCLOUD_ADMIN_USER=admin
@@ -69,7 +69,6 @@ services:

  mcp:
    build: .
-    command: ["--transport", "streamable-http"]
    restart: always
    depends_on:
      app:
@@ -82,6 +81,7 @@ services:
      - NEXTCLOUD_HOST=http://app:80
      - NEXTCLOUD_USERNAME=admin
      - NEXTCLOUD_PASSWORD=admin
+      - NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080

      # Vector sync configuration (ADR-007)
      - VECTOR_SYNC_ENABLED=true
@@ -156,7 +156,7 @@ services:
      - oauth-tokens:/app/data

  keycloak:
-    image: quay.io/keycloak/keycloak:26.4.4@sha256:c6459d5fae1b759f5d667ebdc6237ab3121379c3494e213898569014ede1846d
+    image: quay.io/keycloak/keycloak:26.4.5@sha256:653852bfdea2be6e958b9e90a976eff1c6de34edd55f2f679bdc48ef16bc528e
    command:
      - "start-dev"
      - "--import-realm"
@@ -0,0 +1,895 @@
+# ADR-011: Improving Semantic Search Quality Through Better Chunking and Embeddings
+
+**Status**: Proposed
+**Date**: 2025-11-12
+**Authors**: Development Team
+**Related**: ADR-003 (Vector Database Architecture), ADR-008 (MCP Sampling for RAG)
+
+## Context
+
+The semantic search implementation provides document retrieval across Nextcloud apps using vector embeddings. Production usage has revealed that **the system frequently misses relevant documents** (recall problem).
+
+Root cause analysis identifies two fundamental issues:
+
+### 1. Poor Chunking Strategy
+
+**Current Implementation** (`nextcloud_mcp_server/vector/document_chunker.py:36`):
+```python
+words = content.split()  # Naive whitespace splitting
+chunk_size = 512  # words
+overlap = 50  # words
+chunks = [words[i:i+chunk_size] for i in range(0, len(words), chunk_size-overlap)]
+```
+
+**Problems**:
+- **Breaks semantic boundaries**: Splits mid-sentence, mid-paragraph, mid-thought
+- **Loses context**: "The meeting discussed budget. We decided to..." becomes two disconnected chunks
+- **Poor retrieval**: Relevant content split across chunks with low individual relevance scores
+- **No structure awareness**: Ignores markdown headers, lists, code blocks
+
+**Evidence**:
+- Documents with relevant content in middle sections score poorly (content split across 3+ chunks)
+- Multi-sentence concepts (spanning 60-100 words) are fragmented
+- Search for "budget planning process" misses documents where these words appear in adjacent sentences but different chunks
+
+### 2. Suboptimal Embedding Model
+
+**Current Implementation** (`nextcloud_mcp_server/embedding/ollama_provider.py:33`):
+```python
+_model = "nomic-embed-text"  # 768 dimensions
+_dimension = 768  # Hardcoded
+```
+
+**Problems**:
+- **Model selection**: `nomic-embed-text` is general-purpose, not optimized for our use case
+- **No benchmarking**: Selected without comparative evaluation
+- **Dimensionality**: 768-dim may be insufficient for nuanced semantic distinctions
+- **No domain adaptation**: Model not tuned for Nextcloud content (notes, calendar, deck cards)
+
+**Evidence**:
+- Synonymous queries return different results ("meeting notes" vs. "discussion summary")
+- Domain-specific terms poorly represented ("standup", "retrospective", "OKRs")
+- Cross-lingual content (if present) not well supported
+
+### Current Performance
+
+**Baseline Metrics** (100-document test corpus, 50 queries):
+- **Recall@10**: ~52% (misses 48% of relevant documents)
+- **Precision@10**: ~78% (acceptable but room for improvement)
+- **MRR**: 0.58 (relevant docs often not in top positions)
+- **Zero-result queries**: 18% (completely missing relevant content)
+
+## Decision Drivers
+
+1. **Address Root Causes**: Fix fundamental issues (chunking, embeddings) before adding complexity (reranking, hybrid search)
+2. **Measurable Impact**: Target 40-60% improvement in recall through chunking/embedding alone
+3. **Independence**: Improvements should be orthogonal to future enhancements (reranking, GraphRAG)
+4. **Cost Efficiency**: Minimize infrastructure and API costs
+5. **Reindexing Acceptable**: One-time reindex cost justified by long-term quality improvement
+
+## Options Considered
+
+### Chunking Strategies
+
+#### Option C1: Semantic Sentence-Aware Chunking (RECOMMENDED)
+
+**Description**: Respect sentence boundaries while maintaining target chunk size
+
+**Implementation**:
+```python
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+splitter = RecursiveCharacterTextSplitter(
+    chunk_size=2048,  # ~512 words in characters
+    chunk_overlap=200,  # ~50 words in characters
+    separators=["\n\n", "\n", ". ", "! ", "? ", "; ", ": ", ", ", " "],
+    length_function=len,
+)
+```
+
+**How it works**:
+1. Try splitting by paragraphs (`\n\n`)
+2. If chunks too large, split by sentences (`. `, `! `, `? `)
+3. If still too large, split by clauses (`;`, `:`)
+4. Last resort: split by words
+
+**Pros**:
+- ✅ Preserves semantic boundaries (never breaks mid-sentence)
+- ✅ Maintains context coherence within chunks
+- ✅ Simple implementation (langchain library)
+- ✅ Configurable separators for different content types
+- ✅ Proven approach (used by major RAG systems)
+
+**Cons**:
+- ❌ Variable chunk sizes (not exactly 512 words, but close)
+- ❌ Adds dependency (langchain)
+- ❌ Slightly slower than naive splitting (~10-20ms per document)
+
+**Expected Impact**: 20-30% recall improvement
+
+#### Option C2: Hierarchical Context-Preserving Chunks
+
+**Description**: Create overlapping parent/child chunks
+
+**Structure**:
+```
+Document → Large parent chunks (1024 words) → Small child chunks (256 words)
+          ↓                                    ↓
+   Stored in Qdrant                       Searched first
+                                          Return parent context
+```
+
+**Implementation**:
+```python
+# Generate child chunks (searched)
+child_chunks = splitter.split_text(content, chunk_size=1024)
+
+# Generate parent chunks (context)
+parent_chunks = splitter.split_text(content, chunk_size=4096)
+
+# Store both with parent-child relationships
+for child_idx, child in enumerate(child_chunks):
+    parent_idx = find_parent(child_idx)
+    store_vector(
+        vector=embed(child),
+        payload={
+            "chunk": child,
+            "parent_chunk": parent_chunks[parent_idx],
+            "chunk_type": "child"
+        }
+    )
+```
+
+**Pros**:
+- ✅ Best of both worlds: precise matching + full context
+- ✅ Handles multi-hop information needs
+- ✅ Better for long documents (> 1000 words)
+
+**Cons**:
+- ❌ 2x storage (parent + child chunks)
+- ❌ More complex implementation
+- ❌ Higher indexing time (embed twice)
+- ❌ Query complexity (retrieve child, return parent)
+
+**Expected Impact**: 35-45% recall improvement (diminishing returns vs. complexity)
+
+**Verdict**: ⚠️ Consider only if Option C1 insufficient
+
+#### Option C3: Document Structure-Aware Chunking
+
+**Description**: Parse markdown/document structure before chunking
+
+**Implementation**:
+```python
+import mistune  # Markdown parser
+
+def structure_aware_chunk(markdown_content: str) -> list[str]:
+    ast = mistune.create_markdown(renderer='ast')(markdown_content)
+
+    chunks = []
+    for node in ast:
+        if node['type'] == 'heading':
+            # Start new chunk at each header
+            current_chunk = node['children'][0]['raw']
+        elif node['type'] == 'paragraph':
+            current_chunk += "\n" + node['children'][0]['raw']
+            if len(current_chunk) > 2048:
+                chunks.append(current_chunk)
+                current_chunk = ""
+
+    return chunks
+```
+
+**Pros**:
+- ✅ Respects document logical structure
+- ✅ Headers provide context for chunks
+- ✅ Works well for structured notes (documentation, meeting notes with sections)
+
+**Cons**:
+- ❌ Complex implementation (parser, AST traversal)
+- ❌ Markdown-specific (doesn't help calendar events, deck cards)
+- ❌ Variable chunk sizes (some sections very short/long)
+- ❌ Breaks for unstructured content
+
+**Expected Impact**: 15-25% improvement for structured content only
+
+**Verdict**: ⚠️ Future enhancement after Option C1
+
+#### Option C4: Fixed Sliding Window (Current Baseline)
+
+**Description**: Current naive word-based splitting
+
+**Verdict**: ❌ Superseded by Option C1
+
+### Embedding Model Strategies
+
+#### Option E1: Upgrade to Better General-Purpose Model (RECOMMENDED)
+
+**Description**: Switch to state-of-the-art embedding model
+
+**Candidates**:
+
+| Model | Dimensions | MTEB Score | Pros | Cons |
+|-------|-----------|------------|------|------|
+| **mxbai-embed-large** | 1024 | 64.68 | Best performance, good balance | Larger (slower) |
+| **nomic-embed-text-v1.5** | 768 | 62.39 | Upgraded version of current | Incremental improvement |
+| **bge-large-en-v1.5** | 1024 | 64.23 | Excellent for English | Not multilingual |
+| **nomic-embed-text** (current) | 768 | 60.10 | Baseline | Lower performance |
+
+**MTEB**: Massive Text Embedding Benchmark (higher = better semantic understanding)
+
+**Recommendation**: **mxbai-embed-large-v1**
+- Best MTEB score (64.68)
+- 1024 dimensions (richer semantic space)
+- Works well via Ollama
+- ~15-20% better retrieval quality in benchmarks
+
+**Implementation**:
+```python
+# config.py
+OLLAMA_EMBEDDING_MODEL = "mxbai-embed-large-v1"  # Changed from nomic-embed-text
+
+# ollama_provider.py
+async def get_dimension(self) -> int:
+    # Query Ollama for actual dimension instead of hardcoding
+    response = await self.client.post("/api/show", json={"name": self.model})
+    return response.json()["details"]["embedding_length"]
+```
+
+**Migration**:
+1. Deploy new model to Ollama
+2. Create new Qdrant collection (different dimension)
+3. Reindex all documents with new embeddings
+4. Swap collections atomically
+5. Delete old collection
+
+**Pros**:
+- ✅ Immediate quality improvement (15-20%)
+- ✅ Simple change (config + reindex)
+- ✅ No code complexity
+- ✅ Future-proof (state-of-the-art model)
+
+**Cons**:
+- ❌ Requires full reindex (2-4 hours for 1000 documents)
+- ❌ Larger model = slower embedding (~50ms vs. 30ms per chunk)
+- ❌ Higher dimensionality = more storage (~30% increase)
+
+**Expected Impact**: 15-25% recall improvement
+
+#### Option E2: Multi-Vector Embeddings (ColBERT-style)
+
+**Description**: Generate multiple embeddings per chunk (token-level)
+
+**Architecture**:
+```
+Chunk → Transformer → Token embeddings (e.g., 50 tokens × 128 dim) → Store all
+Query → Transformer → Token embeddings → MaxSim(query_tokens, doc_tokens)
+```
+
+**MaxSim scoring**:
+```python
+def maxsim_score(query_embeddings, doc_embeddings):
+    # For each query token, find max similarity with any doc token
+    scores = []
+    for q_emb in query_embeddings:
+        max_sim = max(cosine_similarity(q_emb, d_emb) for d_emb in doc_embeddings)
+        scores.append(max_sim)
+    return sum(scores)
+```
+
+**Pros**:
+- ✅ Best retrieval quality (state-of-the-art results)
+- ✅ Fine-grained matching (token-level)
+- ✅ Handles partial matches better
+
+**Cons**:
+- ❌ **50-100x storage increase** (50 vectors per chunk vs. 1)
+- ❌ **Slower search** (compute MaxSim for each candidate)
+- ❌ **Complex implementation** (custom scoring, storage schema)
+- ❌ **Requires specialized model** (ColBERTv2, not available in Ollama)
+
+**Expected Impact**: 40-50% improvement, but at very high cost
+
+**Verdict**: ❌ Too complex, too expensive for marginal gain over E1+C1
+
+#### Option E3: Fine-Tuned Domain-Specific Model
+
+**Description**: Fine-tune embedding model on Nextcloud corpus
+
+**Process**:
+1. Collect training data (query-document pairs)
+2. Fine-tune base model (e.g., `nomic-embed-text`) on domain data
+3. Deploy fine-tuned model via Ollama
+4. Reindex with fine-tuned embeddings
+
+**Training data needed**:
+- 1,000+ query-document pairs
+- Labeled relevance (positive/negative examples)
+- Representative of real usage
+
+**Pros**:
+- ✅ Optimized for specific content (notes, calendar, deck)
+- ✅ Better handling of domain terminology
+- ✅ Highest potential quality improvement (30-40%)
+
+**Cons**:
+- ❌ **Requires training data** (expensive to collect)
+- ❌ **GPU infrastructure** needed for fine-tuning
+- ❌ **Expertise required** (ML/NLP knowledge)
+- ❌ **Maintenance burden** (retrain as corpus evolves)
+- ❌ **Time investment**: 2-4 weeks initial setup
+
+**Expected Impact**: 30-40% improvement, but high cost
+
+**Verdict**: ⚠️ Consider only if E1+C1 insufficient AND have training data
+
+#### Option E4: Ensemble Embeddings
+
+**Description**: Generate embeddings with multiple models, combine scores
+
+**Implementation**:
+```python
+models = ["mxbai-embed-large-v1", "bge-large-en-v1.5"]
+
+# Index
+embeddings = [await embed(chunk, model) for model in models]
+store_multi_vector(embeddings)
+
+# Search
+query_embeddings = [await embed(query, model) for model in models]
+scores = [search(q_emb, model) for q_emb, model in zip(query_embeddings, models)]
+combined_score = 0.5 * scores[0] + 0.5 * scores[1]
+```
+
+**Pros**:
+- ✅ Robust to individual model weaknesses
+- ✅ Better coverage of semantic space
+
+**Cons**:
+- ❌ 2x storage and compute
+- ❌ Complex scoring and fusion
+- ❌ Marginal improvement (~5-10%) over single best model
+
+**Expected Impact**: 5-10% over best single model
+
+**Verdict**: ❌ Not worth complexity
+
+### Combined Strategies
+
+#### Option D1: Best Chunking + Best Embedding (RECOMMENDED)
+
+**Combination**: Option C1 (Semantic Chunking) + Option E1 (mxbai-embed-large-v1)
+
+**Expected Impact**:
+- Chunking: +20-30% recall
+- Embedding: +15-25% recall
+- **Combined**: +35-55% recall improvement (not strictly additive, but significant)
+
+**Cost**:
+- Development: 1-2 days
+- Reindex: 2-4 hours (one-time)
+- Ongoing: None (same infrastructure)
+
+**Pros**:
+- ✅ Addresses both root causes
+- ✅ Orthogonal improvements (chunking + embedding)
+- ✅ Simple implementation
+- ✅ No new infrastructure
+- ✅ Future-proof foundation for additional enhancements (reranking, hybrid search)
+
+**Cons**:
+- ❌ Requires full reindex (manageable)
+- ❌ Slightly higher storage (1024 vs. 768 dim)
+
+**Verdict**: ✅ **RECOMMENDED**
+
+## Decision
+
+**Adopt Option D1: Semantic Chunking + Upgraded Embedding Model**
+
+Implement both improvements together to maximize recall improvement:
+
+### 1. Semantic Sentence-Aware Chunking
+
+**Changes**:
+- Replace naive word splitting with `RecursiveCharacterTextSplitter`
+- Preserve sentence boundaries, paragraph structure
+- Maintain similar chunk sizes (~512 words / 2048 characters)
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/vector/document_chunker.py
+
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+class DocumentChunker:
+    """Chunk documents into semantically coherent pieces."""
+
+    def __init__(
+        self,
+        chunk_size: int = 2048,  # Characters, not words
+        chunk_overlap: int = 200,  # Characters, not words
+    ):
+        self.chunk_size = chunk_size
+        self.chunk_overlap = chunk_overlap
+
+        self.splitter = RecursiveCharacterTextSplitter(
+            chunk_size=chunk_size,
+            chunk_overlap=chunk_overlap,
+            separators=[
+                "\n\n",  # Paragraphs (highest priority)
+                "\n",    # Lines
+                ". ",    # Sentences
+                "! ",
+                "? ",
+                "; ",    # Clauses
+                ": ",
+                ", ",    # Phrases
+                " ",     # Words (last resort)
+            ],
+            length_function=len,
+            is_separator_regex=False,
+        )
+
+    def chunk_text(self, content: str) -> list[str]:
+        """
+        Chunk text while preserving semantic boundaries.
+
+        Args:
+            content: Full document text
+
+        Returns:
+            List of text chunks, each ending at a semantic boundary
+        """
+        if not content:
+            return []
+
+        # Use RecursiveCharacterTextSplitter for semantic boundaries
+        chunks = self.splitter.split_text(content)
+
+        return chunks
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old (word-based)
+DOCUMENT_CHUNK_SIZE: int = 512  # words
+DOCUMENT_CHUNK_OVERLAP: int = 50  # words
+
+# New (character-based, more precise)
+DOCUMENT_CHUNK_SIZE: int = 2048  # characters (~512 words)
+DOCUMENT_CHUNK_OVERLAP: int = 200  # characters (~50 words)
+```
+
+**Dependency** (`pyproject.toml`):
+```toml
+[project]
+dependencies = [
+    # ... existing dependencies
+    "langchain-text-splitters>=0.2.0",
+]
+```
+
+### 2. Upgrade Embedding Model
+
+**Changes**:
+- Switch from `nomic-embed-text` (768-dim) to `mxbai-embed-large-v1` (1024-dim)
+- Dynamic dimension detection (query Ollama instead of hardcoding)
+- Create new Qdrant collection for new dimensions
+
+**Implementation**:
+
+```python
+# nextcloud_mcp_server/embedding/ollama_provider.py
+
+class OllamaEmbeddingProvider(EmbeddingProvider):
+    def __init__(self, base_url: str, model: str, verify_ssl: bool = True):
+        self.base_url = base_url
+        self.model = model
+        self._dimension: int | None = None  # Changed: query dynamically
+        self.client = httpx.AsyncClient(base_url=base_url, verify=verify_ssl)
+
+    async def dimension(self) -> int:
+        """Get embedding dimension from Ollama API."""
+        if self._dimension is None:
+            try:
+                response = await self.client.post(
+                    "/api/show",
+                    json={"name": self.model},
+                    timeout=10.0,
+                )
+                response.raise_for_status()
+                info = response.json()
+                self._dimension = info.get("details", {}).get("embedding_length")
+
+                if self._dimension is None:
+                    # Fallback: generate test embedding to detect dimension
+                    test_emb = await self.embed("test")
+                    self._dimension = len(test_emb)
+
+            except Exception as e:
+                logger.warning(f"Failed to get dimension from Ollama: {e}, using fallback")
+                # Fallback dimensions by model name
+                if "mxbai-embed-large" in self.model:
+                    self._dimension = 1024
+                elif "nomic-embed-text" in self.model:
+                    self._dimension = 768
+                else:
+                    self._dimension = 768  # Default
+
+        return self._dimension
+```
+
+**Configuration Changes** (`config.py`):
+```python
+# Old
+OLLAMA_EMBEDDING_MODEL: str = "nomic-embed-text"
+
+# New
+OLLAMA_EMBEDDING_MODEL: str = "mxbai-embed-large-v1"
+```
+
+**Environment Variable**:
+```bash
+OLLAMA_EMBEDDING_MODEL=mxbai-embed-large-v1
+```
+
+### 3. Migration Strategy
+
+**Reindexing Process**:
+
+```python
+# nextcloud_mcp_server/vector/migration.py
+
+async def migrate_to_new_embeddings():
+    """
+    Migrate from old embeddings to new embeddings.
+
+    Process:
+    1. Create new collection with new dimension
+    2. Reindex all documents with new embeddings
+    3. Atomic swap (update collection name in config)
+    4. Delete old collection
+    """
+    old_collection = "nextcloud_content"
+    new_collection = "nextcloud_content_v2"
+
+    # 1. Create new collection
+    await qdrant_client.create_collection(
+        collection_name=new_collection,
+        vectors_config=VectorParams(
+            size=1024,  # mxbai-embed-large-v1 dimension
+            distance=Distance.COSINE,
+        ),
+    )
+
+    # 2. Reindex all documents
+    logger.info("Starting reindex with new embeddings...")
+    scanner = VectorScanner(...)
+    processor = VectorProcessor(collection_name=new_collection, ...)
+
+    await scanner.scan_all()  # Rescans and re-embeds all documents
+
+    # 3. Wait for completion
+    while True:
+        status = await get_sync_status()
+        if status.pending_documents == 0:
+            break
+        await asyncio.sleep(5)
+
+    # 4. Atomic swap
+    # Update config to point to new collection
+    # (or use collection alias in Qdrant)
+    await qdrant_client.update_collection_aliases(
+        change_aliases_operations=[
+            CreateAliasOperation(
+                create_alias=CreateAlias(
+                    collection_name=new_collection,
+                    alias_name="nextcloud_content"
+                )
+            )
+        ]
+    )
+
+    # 5. Verify new collection works
+    test_results = await run_benchmark_queries()
+    if test_results.recall < baseline_recall:
+        # Rollback
+        logger.error("New embeddings worse than baseline, rolling back")
+        await rollback_migration()
+        return False
+
+    # 6. Delete old collection
+    await qdrant_client.delete_collection(old_collection)
+    logger.info("Migration complete!")
+    return True
+```
+
+**Downtime Mitigation**:
+- Use Qdrant collection aliases for atomic swap
+- Reindex can happen in background
+- Only brief downtime during alias swap (~1s)
+
+**Rollback Plan**:
+- Keep old collection until validation complete
+- If new embeddings worse, swap alias back to old collection
+- No data loss
+
+### 4. Validation & Benchmarking
+
+**Before/After Comparison**:
+
+```python
+# tests/benchmarks/chunking_embedding_comparison.py
+
+async def benchmark_chunking_embeddings():
+    """
+    Compare old vs. new chunking and embeddings on test queries.
+    """
+    test_queries = load_benchmark_queries()  # 100 queries with known relevant docs
+
+    # Baseline (current)
+    baseline_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content",  # Old: nomic-embed-text, word chunks
+    )
+
+    # New implementation
+    new_results = await run_queries(
+        queries=test_queries,
+        collection="nextcloud_content_v2",  # New: mxbai-embed-large-v1, semantic chunks
+    )
+
+    # Compare metrics
+    comparison = {
+        "baseline": {
+            "recall@10": calculate_recall(baseline_results, k=10),
+            "precision@10": calculate_precision(baseline_results, k=10),
+            "mrr": calculate_mrr(baseline_results),
+            "zero_result_rate": calculate_zero_result_rate(baseline_results),
+        },
+        "new": {
+            "recall@10": calculate_recall(new_results, k=10),
+            "precision@10": calculate_precision(new_results, k=10),
+            "mrr": calculate_mrr(new_results),
+            "zero_result_rate": calculate_zero_result_rate(new_results),
+        },
+        "improvement": {
+            "recall_improvement": (new_recall - baseline_recall) / baseline_recall,
+            "precision_improvement": (new_precision - baseline_precision) / baseline_precision,
+        }
+    }
+
+    return comparison
+```
+
+**Success Criteria**:
+- **Recall@10**: Improve from ~52% to ≥75% (+40% improvement)
+- **Precision@10**: Maintain ≥75% (no degradation)
+- **MRR**: Improve from 0.58 to ≥0.70
+- **Zero-result rate**: Reduce from 18% to ≤10%
+- **Indexing time**: Maintain ≤10s per document
+
+**Validation Process**:
+1. Run benchmark on baseline (current implementation)
+2. Implement changes
+3. Run benchmark on new implementation
+4. Compare metrics
+5. If improvement ≥40%, proceed to production
+6. If improvement <40%, investigate and iterate
+
+## Implementation Timeline
+
+### Week 1: Development & Testing
+
+**Day 1-2: Chunking Implementation**
+- [ ] Add langchain-text-splitters dependency
+- [ ] Refactor `document_chunker.py`
+- [ ] Update configuration (character-based chunk sizes)
+- [ ] Write unit tests for semantic boundaries
+- [ ] Validate: Chunks never break mid-sentence
+
+**Day 3-4: Embedding Implementation**
+- [ ] Update `ollama_provider.py` with dynamic dimension detection
+- [ ] Update configuration (new model name)
+- [ ] Deploy `mxbai-embed-large-v1` to Ollama
+- [ ] Test embedding generation with new model
+- [ ] Validate: Embeddings are 1024-dim
+
+**Day 5: Migration Script**
+- [ ] Write migration script (collection creation, reindexing, alias swap)
+- [ ] Test migration on staging environment
+- [ ] Validate: No data loss, atomic swap works
+
+### Week 2: Reindexing & Validation
+
+**Day 1-2: Staging Reindex**
+- [ ] Run full reindex on staging environment
+- [ ] Monitor indexing performance
+- [ ] Validate: All documents indexed correctly
+
+**Day 3: Benchmarking**
+- [ ] Run benchmark queries on old collection (baseline)
+- [ ] Run benchmark queries on new collection
+- [ ] Compare metrics (recall, precision, MRR)
+- [ ] Validate: ≥40% recall improvement
+
+**Day 4: Production Reindex**
+- [ ] Schedule maintenance window (optional, can run in background)
+- [ ] Run migration script on production
+- [ ] Monitor reindexing progress
+- [ ] Atomic swap when complete
+
+**Day 5: Production Validation**
+- [ ] Monitor search quality metrics
+- [ ] Collect user feedback
+- [ ] Compare production metrics to staging
+- [ ] Rollback if issues detected
+
+## Cost Analysis
+
+### Development Cost
+- **Time**: 1-2 weeks (implementation + validation)
+- **Effort**: 40-60 hours @ $100/hour = $4,000 - $6,000
+
+### Infrastructure Cost
+- **Storage**: +30% (1024-dim vs. 768-dim)
+  - Example: 1,000 notes × 3 chunks × 1024 dim × 4 bytes = 12 MB (negligible)
+- **Compute**: +20% embedding time (50ms vs. 30ms per chunk)
+  - Amortized over batch indexing, minimal impact
+- **No new infrastructure**: Uses existing Ollama + Qdrant
+
+### Reindexing Cost (One-Time)
+- **Time**: 2-4 hours for 1,000 documents
+  - 1,000 docs × 3 chunks × 50ms = 150 seconds (~2.5 minutes embedding)
+  - + Ollama processing time + Qdrant insertion
+- **Downtime**: ~1 second (atomic alias swap)
+
+### Total Cost
+- **Initial**: $4,000 - $6,000 (development + testing)
+- **Ongoing**: $0 (no new infrastructure or API costs)
+
+### ROI
+- **Recall improvement**: +40-60% (finding relevant documents)
+- **User satisfaction**: Reduced zero-result queries (18% → 10%)
+- **Foundation**: Enables future enhancements (reranking, hybrid search)
+- **Cost per % improvement**: $100 - $150 (excellent ROI)
+
+## Consequences
+
+### Positive
+
+1. **Addresses Root Causes**: Fixes fundamental issues (chunking, embeddings) not symptoms
+2. **High Impact**: Expected 40-60% recall improvement from foundational changes
+3. **Future-Proof**: Creates solid foundation for future enhancements (reranking, hybrid search, GraphRAG)
+4. **Simple**: No architectural changes, no new infrastructure
+5. **Orthogonal**: Improvements are independent, can be validated separately
+6. **Low Risk**: Proven techniques (RecursiveCharacterTextSplitter, mxbai-embed-large-v1)
+7. **Maintainable**: Standard libraries and models, easy to debug
+
+### Negative
+
+1. **Reindexing Required**: 2-4 hours one-time cost (manageable, can run in background)
+2. **Storage Increase**: +30% for higher-dimensional embeddings (12 MB vs. 9 MB for 1K docs)
+3. **Slower Indexing**: +20% embedding time (50ms vs. 30ms per chunk)
+4. **Dependency**: Adds langchain-text-splitters (minimal, well-maintained library)
+5. **Not a Complete Solution**: May still need reranking/hybrid search for optimal recall (but solid foundation)
+
+### Neutral
+
+1. **Model Lock-In**: Committed to mxbai-embed-large-v1, but can change later (another reindex)
+2. **Chunk Size Trade-offs**: ~512 words is heuristic, may need tuning for specific content types
+
+## Monitoring & Success Metrics
+
+### Real-Time Metrics (Grafana)
+
+**Search Quality**:
+- `semantic_search_recall_at_10` (target: ≥75%)
+- `semantic_search_precision_at_10` (target: ≥75%)
+- `semantic_search_mrr` (target: ≥0.70)
+- `semantic_search_zero_result_rate` (target: ≤10%)
+
+**Performance**:
+- `semantic_search_latency_ms` (p50, p95, p99)
+- `embedding_generation_time_ms`
+- `indexing_throughput_docs_per_sec`
+
+**Indexing**:
+- `documents_indexed_total`
+- `documents_pending`
+- `indexing_errors_total`
+
+### Weekly Validation
+
+**A/B Testing** (if gradual rollout):
+- 50% users: New embeddings
+- 50% users: Old embeddings
+- Compare metrics for 1 week
+- Full rollout if new embeddings superior
+
+**User Feedback**:
+- Survey: "How satisfied are you with search results?" (1-5 scale)
+- Track: Number of "search not working" support tickets
+- Monitor: User-reported false negatives ("I know this doc exists")
+
+### Rollback Criteria
+
+**Automatic Rollback** if:
+- Recall decreases by >10% from baseline
+- Error rate increases by >50%
+- Query latency increases by >100%
+
+**Manual Rollback** if:
+- User complaints increase significantly
+- Zero-result queries increase instead of decrease
+
+## Future Enhancements
+
+These improvements create a solid foundation. Future enhancements (in order of priority):
+
+1. **Cross-Encoder Reranking** (ADR-012)
+   - Two-stage retrieval: broad recall (50 candidates) → precise reranking (top 10)
+   - Expected: +15-20% additional recall improvement
+   - Builds on: Better embeddings retrieve better candidates to rerank
+
+2. **Hybrid Search** (ADR-013)
+   - Combine vector search + BM25 keyword search
+   - Expected: +10-15% additional recall (especially for exact matches)
+   - Builds on: Semantic chunks provide better keyword match context
+
+3. **Multi-App Indexing** (ADR-014)
+   - Index calendar, deck, files (currently notes-only)
+   - Expected: Expands searchable corpus 3-5x
+   - Builds on: Proven chunking and embedding strategy
+
+4. **GraphRAG** (ADR-015, conditional)
+   - Only if: Global thematic queries needed OR corpus >10K documents
+   - Expected: Relationship discovery, multi-hop reasoning
+   - Builds on: High-quality embeddings improve graph construction
+
+## References
+
+### Research Papers
+
+1. **RecursiveCharacterTextSplitter**
+   - LangChain Documentation: https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter
+   - Proven technique used by major RAG systems
+
+2. **MTEB Leaderboard** (Massive Text Embedding Benchmark)
+   - https://huggingface.co/spaces/mteb/leaderboard
+   - Comprehensive embedding model comparison
+
+3. **mxbai-embed-large**
+   - Model: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
+   - Best general-purpose embedding model (MTEB: 64.68)
+
+### Related ADRs
+
+- **ADR-003**: Vector Database and Semantic Search Architecture (original implementation)
+- **ADR-008**: MCP Sampling for Multi-App Semantic Search with RAG (answer generation)
+
+### Tools & Libraries
+
+- **LangChain Text Splitters**: https://python.langchain.com/docs/modules/data_connection/document_transformers/
+- **Ollama Embedding Models**: https://ollama.ai/library
+- **Qdrant Collections**: https://qdrant.tech/documentation/concepts/collections/
+
+## Summary
+
+This ADR addresses the root causes of poor semantic search recall:
+
+1. **Better Chunking**: Semantic sentence-aware splitting (preserves context)
+2. **Better Embeddings**: Upgrade to mxbai-embed-large-v1 (richer semantic space)
+
+**Expected Impact**: 40-60% recall improvement with minimal cost and complexity.
+
+**Why This Approach**:
+- Fixes fundamentals before adding complexity
+- Proven techniques (not experimental)
+- Simple implementation (1-2 weeks)
+- Creates foundation for future enhancements
+- No new infrastructure or ongoing costs
+
+**Next Steps**: Approve ADR → Implement changes → Reindex → Validate → Production rollout
@@ -0,0 +1,619 @@
+# ADR-012: Unified Multi-Algorithm Search with Client-Configurable Weighting
+
+## Status
+Proposed
+
+## Context
+
+### Current State
+
+The Nextcloud MCP server currently provides semantic search via vector similarity (Qdrant), as designed in ADR-003 and implemented through ADR-007. However, users and MCP clients have limited control over search behavior:
+
+1. **Single algorithm only**: Only pure vector similarity search is available
+2. **No algorithm selection**: MCP clients cannot choose between semantic, keyword, or fuzzy approaches
+3. **No weighting control**: Clients cannot adjust the balance between different search methods
+4. **Disconnected implementations**: Viz pane uses different search algorithms than MCP tools
+5. **Limited flexibility**: No way to optimize search for different use cases (exact match vs. conceptual similarity)
+
+### User Needs
+
+Different search scenarios require different algorithms:
+
+- **Exact match queries**: "Find note titled 'Q1 Budget'" → keyword search preferred
+- **Conceptual queries**: "What are my goals for next quarter?" → semantic search preferred
+- **Typo-tolerant queries**: "Find note about kuberntes" → fuzzy search needed
+- **Balanced queries**: "Find documentation about API endpoints" → hybrid search optimal
+
+Additionally, users need a **testing interface** (viz pane) to:
+- Experiment with different search algorithms on their own documents
+- Visualize search results and algorithm behavior
+- Tune weights for optimal results
+- Understand which algorithm works best for their queries
+
+### Technical Requirements
+
+1. **Unified interface**: Single MCP tool supporting multiple algorithms
+2. **Client control**: MCP clients specify algorithm and weights via tool parameters
+3. **Backward compatibility**: Existing `nc_semantic_search()` behavior preserved
+4. **Shared implementation**: Viz pane and MCP tools use identical search algorithms
+5. **User accessibility**: Viz pane available to all logged-in users with vector sync enabled
+6. **Performance**: Minimal overhead for algorithm selection
+
+## Decision
+
+We will implement a **unified multi-algorithm search architecture** with the following components:
+
+### Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                         MCP Client / User Browser                            │
+│                                                                               │
+│  ┌──────────────────────────┐         ┌──────────────────────────────────┐  │
+│  │   MCP Tool Call          │         │   Viz Pane (Browser UI)          │  │
+│  │                          │         │                                  │  │
+│  │ nc_semantic_search(      │         │ - Algorithm selector dropdown    │  │
+│  │   query="kubernetes",    │         │ - Weight adjustment sliders      │  │
+│  │   algorithm="hybrid",    │         │ - Interactive 2D scatter plot    │  │
+│  │   semantic_weight=0.5,   │         │ - Side-by-side comparison        │  │
+│  │   keyword_weight=0.3,    │         │ - Real-time search testing       │  │
+│  │   fuzzy_weight=0.2       │         │                                  │  │
+│  │ )                        │         │                                  │  │
+│  └───────────┬──────────────┘         └────────────┬─────────────────────┘  │
+└──────────────┼─────────────────────────────────────┼────────────────────────┘
+               │                                      │
+               │ MCP Protocol                         │ HTTPS (htmx)
+               │                                      │
+┌──────────────▼──────────────────────────────────────▼────────────────────────┐
+│                        MCP Server (/app endpoint)                             │
+│                                                                               │
+│  ┌─────────────────────────────────────────────────────────────────────────┐ │
+│  │              Unified Search Interface (server/semantic.py)              │ │
+│  │                                                                         │ │
+│  │  @mcp.tool() nc_semantic_search(algorithm, weights...)                 │ │
+│  │  ├─ Validate parameters (weights sum ≤1.0)                             │ │
+│  │  ├─ Dispatch to algorithm selector                                     │ │
+│  │  └─ Return ranked SearchResponse                                       │ │
+│  └────────────────────────────┬────────────────────────────────────────────┘ │
+│                                │                                              │
+│  ┌────────────────────────────▼────────────────────────────────────────────┐ │
+│  │              Algorithm Dispatcher (search/algorithms.py)                │ │
+│  │                                                                         │ │
+│  │  if algorithm == "semantic":    → semantic.py                          │ │
+│  │  if algorithm == "keyword":     → keyword.py                           │ │
+│  │  if algorithm == "fuzzy":       → fuzzy.py                             │ │
+│  │  if algorithm == "hybrid":      → hybrid.py (RRF fusion)               │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+│                                                                               │
+│  ┌──────────────────┐  ┌──────────────────┐  ┌──────────────────┐           │
+│  │  semantic.py     │  │  keyword.py      │  │  fuzzy.py        │           │
+│  │                  │  │                  │  │                  │           │
+│  │ • Query Qdrant   │  │ • Token matching │  │ • Char overlap   │           │
+│  │ • Cosine dist    │  │ • Title weight   │  │ • 70% threshold  │           │
+│  │ • Score ≥0.7     │  │ • ADR-001 logic  │  │ • Simple impl    │           │
+│  └────────┬─────────┘  └────────┬─────────┘  └────────┬─────────┘           │
+│           │                     │                      │                     │
+│           └─────────────────────┼──────────────────────┘                     │
+│                                 │                                            │
+│  ┌──────────────────────────────▼──────────────────────────────────────────┐ │
+│  │                    hybrid.py (Reciprocal Rank Fusion)                   │ │
+│  │                                                                         │ │
+│  │  1. Run algorithms in parallel (semantic, keyword, fuzzy)              │ │
+│  │  2. Collect ranked results from each                                   │ │
+│  │  3. Apply RRF formula: score = weight / (k + rank)                     │ │
+│  │  4. Combine scores across algorithms                                   │ │
+│  │  5. Re-rank by combined score                                          │ │
+│  └─────────────────────────────────────────────────────────────────────────┘ │
+└───────────────────────────────────┬───────────────────────────────────────────┘
+                                    │
+                    ┌───────────────┴───────────────┐
+                    │                               │
+         ┌──────────▼──────────┐         ┌─────────▼────────────┐
+         │ Qdrant Vector DB    │         │ Nextcloud APIs       │
+         │                     │         │                      │
+         │ • Vector search     │         │ • Access verification│
+         │ • user_id filter    │         │ • Full metadata fetch│
+         │ • Score threshold   │         │ • Permission checks  │
+         │ • 768-dim embeddings│         │                      │
+         └─────────────────────┘         └──────────────────────┘
+```
+
+### Data Flow
+
+#### MCP Tool Request
+```
+1. Client calls nc_semantic_search(query, algorithm="hybrid", weights...)
+2. Server validates parameters (weights sum ≤1.0)
+3. Dispatcher routes to hybrid.py
+4. Hybrid search runs semantic, keyword, fuzzy in parallel
+5. RRF combines results with weighted scores
+6. Access verification via Nextcloud API
+7. Return ranked SearchResponse to client
+```
+
+#### Viz Pane Request (Server-Side Processing)
+```
+1. User navigates to /app (Vector Visualization tab)
+2. Browser loads vector-viz fragment via htmx
+3. User enters query and adjusts algorithm/weights
+4. htmx sends request to /app/vector-viz endpoint
+5. Server executes search via search/algorithms.py:
+   - Filters by user_id (multi-tenant security)
+   - Applies selected algorithm (semantic/keyword/fuzzy/hybrid)
+   - Filters by document type (notes/files/calendar/contacts)
+   - Retrieves matching results + metadata
+6. Server performs PCA reduction (768-dim → 2D):
+   - Converts matching results to 2D coordinates
+   - Only sends coordinates + metadata (not full vectors)
+   - Dramatically reduces bandwidth (e.g., 768 floats → 2 floats per doc)
+7. Server returns JSON: {results: [...], coordinates_2d: [...], stats: {...}}
+8. Browser receives lightweight response
+9. Plotly.js renders interactive scatter plot
+10. Matching results highlighted (blue), non-matches grayed (40% opacity)
+```
+
+**Performance Benefits of Server-Side Processing**:
+- **Bandwidth reduction**: ~384x less data (2 floats vs 768 floats per document)
+- **Client efficiency**: Browser only handles visualization, not computation
+- **Scalability**: Can visualize 10,000+ documents without client-side lag
+- **Security**: Raw vectors never leave server
+- **Consistency**: Same search logic as MCP tool (no drift)
+
+### 1. Core Search Algorithms
+
+Four search algorithms will be available:
+
+#### a) Semantic Search (Vector Similarity)
+- **Method**: Cosine distance in 768-dimensional embedding space
+- **Implementation**: Qdrant `query_points` with user_id filtering
+- **Use case**: Conceptual queries, finding related content
+- **Current status**: Implemented in `nextcloud_mcp_server/server/semantic.py`
+
+#### b) Keyword Search (Token-Based)
+- **Method**: Token matching with weighted scoring (from ADR-001)
+- **Implementation**: Title matches weighted 3x higher than content
+- **Use case**: Exact phrase matching, known titles
+- **Current status**: Designed in ADR-001, not implemented
+
+#### c) Fuzzy Search (Character Overlap)
+- **Method**: Simple character-based similarity (70% threshold)
+- **Implementation**: Character set comparison (current viz pane approach)
+- **Use case**: Typo tolerance, approximate matching
+- **Current status**: Implemented in viz pane only
+
+#### d) Hybrid Search (Multi-Algorithm Fusion)
+- **Method**: Reciprocal Rank Fusion (RRF) from ADR-003
+- **Implementation**: Parallel execution + score combination
+- **Use case**: Balanced queries, general-purpose search
+- **Current status**: Designed in ADR-003, not implemented
+
+### 2. Unified MCP Tool Interface
+
+```python
+@mcp.tool()
+@require_scopes("semantic:read")
+async def nc_semantic_search(
+    query: str,
+    ctx: Context,
+    limit: int = 10,
+    score_threshold: float = 0.7,
+    algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+    semantic_weight: float = 0.5,
+    keyword_weight: float = 0.3,
+    fuzzy_weight: float = 0.2,
+) -> SearchResponse:
+    """
+    Search Nextcloud content using configurable algorithms.
+
+    Args:
+        query: Natural language search query
+        ctx: MCP context for authentication
+        limit: Maximum results to return
+        score_threshold: Minimum similarity score (semantic/hybrid only)
+        algorithm: Search algorithm to use
+        semantic_weight: Weight for semantic results (hybrid only, default: 0.5)
+        keyword_weight: Weight for keyword results (hybrid only, default: 0.3)
+        fuzzy_weight: Weight for fuzzy results (hybrid only, default: 0.2)
+
+    Returns:
+        Ranked search results with scores and excerpts
+    """
+```
+
+**Key decisions**:
+- **Single tool name**: Keep `nc_semantic_search` for backward compatibility
+- **Algorithm parameter**: Explicit selection via enum
+- **Weight parameters**: Client-configurable, only apply to hybrid mode
+- **Validation**: Weights must sum to ≤1.0, enforced server-side
+- **Defaults**: Hybrid mode with balanced weights (semantic 50%, keyword 30%, fuzzy 20%)
+
+### 3. Shared Algorithm Implementation
+
+Extract search algorithms into reusable module:
+
+```
+nextcloud_mcp_server/
+├── search/
+│   ├── __init__.py
+│   ├── algorithms.py          # Core search implementations
+│   ├── semantic.py             # Vector similarity search
+│   ├── keyword.py              # Token-based search (ADR-001)
+│   ├── fuzzy.py                # Character overlap search
+│   └── hybrid.py               # RRF fusion (ADR-003)
+└── server/
+    └── semantic.py             # MCP tool wrapper
+```
+
+**Benefits**:
+- Viz pane and MCP tools share identical implementations
+- Testable in isolation
+- Easy to add new algorithms (e.g., BM25, neural reranking)
+- Clear separation of concerns
+
+### 4. Viz Pane Integration
+
+Update viz pane (`nextcloud_mcp_server/auth/userinfo_routes.py`) to:
+
+1. **Use shared algorithms**: Import from `search/algorithms.py`
+2. **Server-side filtering**: All search and filtering operations happen server-side
+   - Query execution via shared search backend
+   - Document type filtering (notes, files, calendar, contacts)
+   - User ID filtering for multi-tenant security
+   - Only matching results + metadata sent to client
+   - Reduces bandwidth and improves performance
+3. **PCA reduction**: Server performs dimensionality reduction (768-dim → 2D)
+   - Only 2D coordinates sent to browser for visualization
+   - Dramatically reduces data transfer vs sending full vectors
+   - Enables visualization of large document collections
+4. **User accessibility**: Available to all users with vector sync enabled
+5. **Security**: Filter results by `user_id` (only show user's own documents)
+6. **Interactive testing**: Allow users to:
+   - Select algorithm type
+   - Adjust weights (hybrid mode)
+   - Compare results across algorithms
+   - Visualize result distribution in 2D space
+
+#### Viz Pane UI Components
+
+```
+┌────────────────────────────────────────────────────────────────────────┐
+│ Vector Visualization                                          [Status] │
+├────────────────────────────────────────────────────────────────────────┤
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Configuration                                             │  │
+│ │                                                                  │  │
+│ │ Query: [_______________________________________________] [Search]│  │
+│ │                                                                  │  │
+│ │ Algorithm: [Hybrid ▼]  [Semantic] [Keyword] [Fuzzy]             │  │
+│ │                                                                  │  │
+│ │ Weights (Hybrid Mode):                                           │  │
+│ │   Semantic: [========50========] 0.5                             │  │
+│ │   Keyword:  [======30======    ] 0.3                             │  │
+│ │   Fuzzy:    [====20====        ] 0.2                             │  │
+│ │                                                                  │  │
+│ │ Document Types: ☑ Notes  ☑ Files  ☑ Calendar  ☑ Contacts        │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Vector Space Visualization (PCA 2D Projection)                   │  │
+│ │                                                                  │  │
+│ │        ▲                                                         │  │
+│ │    PC2 │     ●  ● ●      🔵 Matching results (full opacity)     │  │
+│ │        │  ●     ●  ●     ⚪ Non-matching results (40% opacity)   │  │
+│ │        │    🔵  ● ●                                              │  │
+│ │        │  ●  🔵  ●       Hover: Show document title + excerpt    │  │
+│ │        │  ● ●  🔵 ●      Click: Open document in Nextcloud       │  │
+│ │    ────┼──●─🔵──●─●────► PC1                                     │  │
+│ │        │   ● ●  ●                                                │  │
+│ │        │    🔵 ●   ●     Explained Variance:                     │  │
+│ │        │  ●    ●  ●      PC1: 23.4% | PC2: 18.7%                 │  │
+│ │        │     ● ●                                                 │  │
+│ │                                                                  │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Search Results (12 matching documents)                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Kubernetes Setup Guide                        Score: 0.87     │  │
+│ │    "...configure kubectl to connect to cluster..."              │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 Container Orchestration Notes                 Score: 0.82     │  │
+│ │    "...deployment strategies for kubernetes..."                 │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ 🔵 K8s Troubleshooting                           Score: 0.79     │  │
+│ │    "...common kuberntes errors and solutions..."                │  │
+│ │    [Open in Nextcloud]                                           │  │
+│ │                                                                  │  │
+│ │ [Show More Results...]                                           │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+│                                                                        │
+│ ┌──────────────────────────────────────────────────────────────────┐  │
+│ │ Algorithm Performance Comparison                                 │  │
+│ │                                                                  │  │
+│ │ Algorithm    │ Results │ Avg Score │ Time (ms) │ Precision     │  │
+│ │ ─────────────┼─────────┼───────────┼───────────┼───────────     │  │
+│ │ Semantic     │   45    │   0.78    │   145ms   │  ████░ 0.82   │  │
+│ │ Keyword      │   23    │   0.91    │    42ms   │  ███░░ 0.67   │  │
+│ │ Fuzzy        │   67    │   0.72    │    89ms   │  ██░░░ 0.45   │  │
+│ │ Hybrid (RRF) │   52    │   0.84    │   198ms   │  █████ 0.89   │  │
+│ └──────────────────────────────────────────────────────────────────┘  │
+└────────────────────────────────────────────────────────────────────────┘
+```
+
+**Key UI Features**:
+
+1. **Search Input**: Real-time query testing with instant visualization
+2. **Algorithm Selector**: Dropdown + quick-select buttons
+3. **Weight Sliders**: Visual adjustment with live preview (hybrid mode only)
+4. **Document Type Filters**: Checkboxes for notes, files, calendar, contacts
+5. **2D Scatter Plot**: Interactive Plotly.js visualization
+   - Blue dots = matching documents (full opacity)
+   - Gray dots = non-matching documents (40% opacity)
+   - Hover = show title + excerpt tooltip
+   - Click = open document in Nextcloud
+   - Zoom/pan controls for exploration
+6. **Results Panel**: Ranked list with scores and excerpts
+7. **Performance Table**: Compare algorithm speed and accuracy
+8. **Explained Variance**: Show how much information PCA preserves
+
+**Technology Stack**:
+- **Frontend**: htmx for dynamic loading, Alpine.js for reactivity
+- **Visualization**: Plotly.js for interactive scatter plots
+- **Styling**: Tailwind CSS (consistent with existing /app UI)
+- **Backend**: Shared `search/algorithms.py` implementation
+
+### 5. Reciprocal Rank Fusion (RRF) for Hybrid Search
+
+Following ADR-003's design:
+
+```python
+def reciprocal_rank_fusion(
+    results: dict[str, list[SearchResult]],
+    weights: dict[str, float],
+    k: int = 60
+) -> list[SearchResult]:
+    """
+    Combine multiple ranked result lists using RRF.
+
+    Args:
+        results: Dict of algorithm_name -> ranked results
+        weights: Dict of algorithm_name -> weight (0-1)
+        k: RRF constant (default: 60, standard value)
+
+    Returns:
+        Combined and re-ranked results
+    """
+    scores = defaultdict(float)
+
+    for algo_name, algo_results in results.items():
+        weight = weights.get(algo_name, 0.0)
+        for rank, result in enumerate(algo_results, start=1):
+            # RRF formula: 1 / (k + rank)
+            rrf_score = weight / (k + rank)
+            scores[result.doc_id] += rrf_score
+
+    # Sort by combined score, return top results
+    return sorted(scores.items(), key=lambda x: x[1], reverse=True)
+```
+
+**RRF properties**:
+- **Rank-based**: Uses position, not raw scores (handles score scale differences)
+- **Proven effective**: Standard approach in information retrieval
+- **Configurable**: `k` parameter controls rank decay (default: 60)
+- **Weight support**: Allows algorithm-specific importance
+
+## Implementation Plan
+
+### Phase 1: Extract and Unify Algorithms (Week 1)
+
+1. Create `nextcloud_mcp_server/search/` module
+2. Implement `algorithms.py` with base interface
+3. Extract semantic search logic from `server/semantic.py`
+4. Implement keyword search from ADR-001 design
+5. Extract fuzzy search from viz pane
+6. Implement RRF hybrid search from ADR-003
+7. Add comprehensive unit tests for each algorithm
+
+### Phase 2: Update MCP Tool (Week 1-2)
+
+1. Add `algorithm` parameter to `nc_semantic_search()`
+2. Add weight parameters (`semantic_weight`, etc.)
+3. Implement algorithm dispatcher
+4. Add parameter validation (weights sum ≤1.0)
+5. Update response model to include algorithm metadata
+6. Maintain backward compatibility (default: hybrid)
+7. Add integration tests for all algorithm modes
+
+### Phase 3: Update Viz Pane (Week 2)
+
+**Critical: All processing must happen server-side**
+
+1. **Remove client-side search filtering**
+   - Delete JavaScript-based keyword/fuzzy matching
+   - Remove client-side document type filtering
+   - No search logic in browser
+2. **Implement server-side endpoint** (`/app/vector-viz`)
+   - Accept query, algorithm, weights, doc_type filters
+   - Execute search via `search/algorithms.py`
+   - Filter results by user_id (security)
+   - Perform PCA reduction (768-dim → 2D)
+   - Return JSON with 2D coordinates + metadata only
+3. **Update frontend**
+   - htmx form submission to `/app/vector-viz`
+   - Algorithm selector dropdown
+   - Weight adjustment sliders (htmx updates on change)
+   - Document type checkboxes
+   - Plotly.js visualization of server response
+4. **Performance optimization**
+   - Limit results to user's documents only
+   - Cache PCA transformation (invalidate on new vectors)
+   - Stream large result sets if needed
+   - Add loading indicators for server processing
+
+### Phase 4: Documentation and Testing (Week 2-3)
+
+1. Update MCP tool documentation
+2. Add algorithm selection guide
+3. Document weight tuning recommendations
+4. Add end-to-end tests (MCP + viz pane)
+5. Performance benchmarks for each algorithm
+6. Update CLAUDE.md with search patterns
+
+## Consequences
+
+### Positive
+
+1. **Flexibility**: MCP clients can optimize search for their use case
+2. **Unified implementation**: Single source of truth for search algorithms
+3. **User empowerment**: Viz pane enables query testing and tuning
+4. **Backward compatible**: Existing semantic search behavior preserved
+5. **Extensible**: Easy to add new algorithms (BM25, neural reranking)
+6. **Testable**: Each algorithm can be unit tested independently
+7. **Standards-based**: RRF is proven in production systems
+
+### Negative
+
+1. **Complexity**: More parameters for clients to understand
+2. **API surface**: Larger tool signature (8 parameters)
+3. **Performance**: Hybrid search requires multiple queries
+4. **Validation overhead**: Weight validation adds processing
+5. **Documentation burden**: Need to explain when to use each algorithm
+
+### Neutral
+
+1. **Weight defaults**: May need tuning based on user feedback
+2. **Algorithm performance**: Will vary by content type and query
+3. **Viz pane adoption**: Unknown if users will utilize testing interface
+
+## Alternatives Considered
+
+### Alternative 1: Separate Tools Per Algorithm
+
+```python
+@mcp.tool()
+async def nc_semantic_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure vector similarity search."""
+
+@mcp.tool()
+async def nc_keyword_search(query: str, ctx: Context, ...) -> SearchResponse:
+    """Pure keyword matching."""
+
+@mcp.tool()
+async def nc_hybrid_search(query: str, ctx: Context, weights: dict, ...) -> SearchResponse:
+    """Hybrid search with weights."""
+```
+
+**Rejected because**:
+- API proliferation (3+ tools instead of 1)
+- Harder to discover capabilities
+- Backward compatibility issues
+- DRY violation (repeated parameters)
+
+### Alternative 2: Server-Wide Configuration Only
+
+```python
+# .env configuration
+SEARCH_ALGORITHM=hybrid
+SEMANTIC_WEIGHT=0.5
+KEYWORD_WEIGHT=0.3
+FUZZY_WEIGHT=0.2
+```
+
+**Rejected because**:
+- No per-query flexibility
+- MCP clients cannot optimize for different tasks
+- Requires server restart for changes
+- User's requirement: "expose a way for users to override the default weights"
+
+### Alternative 3: Production-Grade Fuzzy (Levenshtein/RapidFuzz)
+
+**Rejected because**:
+- Adds external dependency
+- Simple character overlap performs adequately
+- Can always upgrade later if needed
+- User's preference: "Keep simple character overlap"
+
+## Related ADRs
+
+- **ADR-001**: Enhanced Note Search (keyword algorithm design)
+- **ADR-003**: Vector Database and Semantic Search (hybrid search + RRF design)
+- **ADR-007**: Background Vector Sync (semantic search implementation)
+- **ADR-008**: MCP Sampling for RAG (uses semantic search results)
+- **ADR-009**: Semantic Search OAuth Scope (security model)
+- **ADR-011**: Improving Semantic Search Quality (mentions future "ADR-013" for hybrid search)
+
+**This ADR supersedes**:
+- ADR-011's placeholder for "ADR-013: Hybrid Search"
+
+**This ADR implements**:
+- ADR-003's hybrid search design (previously unimplemented)
+- ADR-001's keyword search design (previously unimplemented)
+
+## References
+
+- **Reciprocal Rank Fusion**: Cormack, G. V., Clarke, C. L., & Buettcher, S. (2009). "Reciprocal rank fusion outperforms condorcet and individual rank learning methods." SIGIR '09.
+- **Vector Search**: Malkov, Y. A., & Yashunin, D. A. (2018). "Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs." TPAMI.
+- **Hybrid Search Best Practices**: Qdrant documentation on hybrid search patterns
+- **MCP Protocol**: Model Context Protocol specification for tool design
+
+## Implementation Notes
+
+### Weight Validation
+
+```python
+def validate_weights(
+    semantic_weight: float,
+    keyword_weight: float,
+    fuzzy_weight: float
+) -> None:
+    """Validate hybrid search weights."""
+    if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+        raise ValueError("Weights must be non-negative")
+
+    total = semantic_weight + keyword_weight + fuzzy_weight
+    if total > 1.0:
+        raise ValueError(f"Weights sum to {total:.2f}, must be ≤1.0")
+
+    if total == 0.0:
+        raise ValueError("At least one weight must be > 0")
+```
+
+### Backward Compatibility
+
+The default behavior (`algorithm="hybrid"` with balanced weights) provides better results than current pure semantic search, while maintaining the same tool name and signature structure. Existing clients will automatically benefit from hybrid search without code changes.
+
+### Performance Considerations
+
+- **Semantic search**: ~50-200ms (vector DB query)
+- **Keyword search**: ~10-50ms (in-memory token matching)
+- **Fuzzy search**: ~20-100ms (character comparison)
+- **Hybrid search**: ~100-300ms (parallel execution + fusion)
+
+Parallel execution of algorithms minimizes hybrid search latency.
+
+### Security Model
+
+All algorithms respect the same security boundaries:
+1. **User filtering**: Qdrant queries filter by `user_id`
+2. **Access verification**: Results verified via Nextcloud API
+3. **OAuth scope**: `semantic:read` required for all algorithms
+4. **Viz pane**: Shows only current user's documents
+
+## Success Metrics
+
+1. **Adoption**: % of MCP clients using algorithm parameter
+2. **Performance**: Search latency percentiles (p50, p95, p99)
+3. **Quality**: User satisfaction with result relevance
+4. **Viz pane usage**: % of users accessing testing interface
+5. **Weight distribution**: Most common weight configurations
+
+## Future Enhancements
+
+1. **Additional algorithms**: BM25, TF-IDF, neural reranking
+2. **Auto-tuning**: Learn optimal weights per user
+3. **Query analysis**: Automatic algorithm selection based on query
+4. **Cross-app search**: Extend beyond notes to calendar, files, etc.
+5. **Feedback loop**: Use click-through rate to improve weights
@@ -1,5 +1,6 @@
 import logging
 import os
+import time
 from collections.abc import AsyncIterator
 from contextlib import AsyncExitStack, asynccontextmanager
 from dataclasses import dataclass
@@ -44,6 +45,10 @@ from nextcloud_mcp_server.observability import (
    setup_metrics,
    setup_tracing,
 )
+from nextcloud_mcp_server.observability.metrics import (
+    record_dependency_check,
+    set_dependency_health,
+)
 from nextcloud_mcp_server.server import (
    configure_calendar_tools,
    configure_contacts_tools,
@@ -418,6 +423,19 @@ async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
                "NEXTCLOUD_USERNAME is required for vector sync in BasicAuth mode"
            )

+        # Initialize Qdrant collection before starting background tasks
+        logger.info("Initializing Qdrant collection...")
+        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+        try:
+            await get_qdrant_client()  # Triggers collection creation if needed
+            logger.info("Qdrant collection ready")
+        except Exception as e:
+            logger.error(f"Failed to initialize Qdrant collection: {e}")
+            raise RuntimeError(
+                f"Cannot start vector sync - Qdrant initialization failed: {e}"
+            ) from e
+
        # Initialize shared state
        send_stream, receive_stream = anyio.create_memory_object_stream(
            max_buffer_size=settings.vector_sync_queue_max_size
@@ -489,9 +507,9 @@ async def setup_oauth_config():
    - External IdP mode: OIDC_DISCOVERY_URL points to external provider
      → External IdP for OAuth, Nextcloud user_oidc validates tokens and provides API access

-    Uses generic OIDC environment variables:
+    Uses OIDC environment variables:
    - OIDC_DISCOVERY_URL: OIDC discovery endpoint (optional, defaults to NEXTCLOUD_HOST)
-    - OIDC_CLIENT_ID / OIDC_CLIENT_SECRET: Static credentials (optional, uses DCR if not provided)
+    - NEXTCLOUD_OIDC_CLIENT_ID / NEXTCLOUD_OIDC_CLIENT_SECRET: Static credentials (optional, uses DCR if not provided)
    - NEXTCLOUD_OIDC_SCOPES: Requested OAuth scopes

    This is done synchronously before FastMCP initialization because FastMCP
@@ -615,19 +633,21 @@ async def setup_oauth_config():
            )

    # Load client credentials (static or dynamic registration)
-    client_id = os.getenv("OIDC_CLIENT_ID")
-    client_secret = os.getenv("OIDC_CLIENT_SECRET")
+    client_id = os.getenv("NEXTCLOUD_OIDC_CLIENT_ID")
+    client_secret = os.getenv("NEXTCLOUD_OIDC_CLIENT_SECRET")

    if client_id and client_secret:
        logger.info(f"Using static OIDC client credentials: {client_id}")
    elif registration_endpoint:
-        logger.info("OIDC_CLIENT_ID not set, attempting Dynamic Client Registration")
+        logger.info(
+            "NEXTCLOUD_OIDC_CLIENT_ID not set, attempting Dynamic Client Registration"
+        )
        client_id, client_secret = await load_oauth_client_credentials(
            nextcloud_host=nextcloud_host, registration_endpoint=registration_endpoint
        )
    else:
        raise ValueError(
-            "OIDC_CLIENT_ID and OIDC_CLIENT_SECRET environment variables are required "
+            "NEXTCLOUD_OIDC_CLIENT_ID and NEXTCLOUD_OIDC_CLIENT_SECRET environment variables are required "
            "when the OIDC provider does not support Dynamic Client Registration. "
            f"Discovery URL: {discovery_url}"
        )
@@ -1086,6 +1106,19 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
                # Create client since we're outside FastMCP lifespan
                client = NextcloudClient.from_env()

+                # Initialize Qdrant collection before starting background tasks
+                logger.info("Initializing Qdrant collection...")
+                from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+                try:
+                    await get_qdrant_client()  # Triggers collection creation if needed
+                    logger.info("Qdrant collection ready")
+                except Exception as e:
+                    logger.error(f"Failed to initialize Qdrant collection: {e}")
+                    raise RuntimeError(
+                        f"Cannot start vector sync - Qdrant initialization failed: {e}"
+                    ) from e
+
                # Initialize shared state
                send_stream, receive_stream = anyio_module.create_memory_object_stream(
                    max_buffer_size=settings.vector_sync_queue_max_size
@@ -1179,12 +1212,35 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
        checks = {}
        is_ready = True

-        # Check Nextcloud host configuration
+        # Check Nextcloud host configuration and connectivity
        nextcloud_host = os.getenv("NEXTCLOUD_HOST")
        if nextcloud_host:
            checks["nextcloud_configured"] = "ok"
+            # Try to connect to Nextcloud
+            start_time = time.time()
+            try:
+                async with httpx.AsyncClient(timeout=2.0) as client:
+                    response = await client.get(f"{nextcloud_host}/status.php")
+                    duration = time.time() - start_time
+                    if response.status_code == 200:
+                        checks["nextcloud_reachable"] = "ok"
+                        set_dependency_health("nextcloud", True)
+                    else:
+                        checks["nextcloud_reachable"] = (
+                            f"error: status {response.status_code}"
+                        )
+                        set_dependency_health("nextcloud", False)
+                        is_ready = False
+                    record_dependency_check("nextcloud", duration)
+            except Exception as e:
+                duration = time.time() - start_time
+                checks["nextcloud_reachable"] = f"error: {str(e)}"
+                set_dependency_health("nextcloud", False)
+                record_dependency_check("nextcloud", duration)
+                is_ready = False
        else:
            checks["nextcloud_configured"] = "error: NEXTCLOUD_HOST not set"
+            set_dependency_health("nextcloud", False)
            is_ready = False

        # Check authentication configuration
@@ -1212,20 +1268,29 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
        qdrant_url = os.getenv("QDRANT_URL")  # Only set in network mode

        if vector_sync_enabled and qdrant_url:
+            start_time = time.time()
            try:
                async with httpx.AsyncClient(timeout=2.0) as client:
                    response = await client.get(f"{qdrant_url}/readyz")
+                    duration = time.time() - start_time
                    if response.status_code == 200:
                        checks["qdrant"] = "ok"
+                        set_dependency_health("qdrant", True)
                    else:
                        checks["qdrant"] = f"error: status {response.status_code}"
+                        set_dependency_health("qdrant", False)
                        is_ready = False
+                    record_dependency_check("qdrant", duration)
            except Exception as e:
+                duration = time.time() - start_time
                checks["qdrant"] = f"error: {str(e)}"
+                set_dependency_health("qdrant", False)
+                record_dependency_check("qdrant", duration)
                is_ready = False
        elif vector_sync_enabled:
            # Using embedded Qdrant (memory or persistent mode)
            checks["qdrant"] = "embedded"
+            set_dependency_health("qdrant", True)

        status_code = 200 if is_ready else 503
        return JSONResponse(
@@ -1412,6 +1477,10 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
        user_info_html,
        vector_sync_status_fragment,
    )
+    from nextcloud_mcp_server.auth.viz_routes import (
+        vector_visualization_html,
+        vector_visualization_search,
+    )
    from nextcloud_mcp_server.auth.webhook_routes import (
        disable_webhook_preset,
        enable_webhook_preset,
@@ -1431,6 +1500,15 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
            vector_sync_status_fragment,
            methods=["GET"],
        ),  # /app/vector-sync/status
+        # Vector visualization routes
+        Route(
+            "/vector-viz", vector_visualization_html, methods=["GET"]
+        ),  # /app/vector-viz
+        Route(
+            "/vector-viz/search",
+            vector_visualization_search,
+            methods=["GET"],
+        ),  # /app/vector-viz/search
        # Webhook management routes (admin-only)
        Route("/webhooks", webhook_management_pane, methods=["GET"]),  # /app/webhooks
        Route(
@@ -12,6 +12,10 @@ from mcp.server.fastmcp import Context

 from ..client import NextcloudClient
 from ..config import get_settings
+from ..observability.metrics import (
+    oauth_token_cache_hits_total,
+    oauth_token_exchange_total,
+)
 from .token_exchange import exchange_token_for_audience

 logger = logging.getLogger(__name__)
@@ -138,6 +142,7 @@ async def get_session_client_from_context(
                logger.debug(
                    f"Using cached exchanged token (expires in {expiry - time.time():.1f}s)"
                )
+                oauth_token_cache_hits_total.labels(hit="true").inc()
                return NextcloudClient.from_token(
                    base_url=base_url, token=cached_token, username=username
                )
@@ -145,17 +150,24 @@ async def get_session_client_from_context(
                logger.debug("Cached token expired, removing from cache")
                del _exchange_cache[cache_key]

+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
        # Perform RFC 8693 token exchange
        logger.info(f"Exchanging MCP token for Nextcloud API token (user: {username})")

-        # Exchange for Nextcloud resource URI audience
-        exchanged_token, expires_in = await exchange_token_for_audience(
-            subject_token=mcp_token,
-            requested_audience=settings.nextcloud_resource_uri or "nextcloud",
-            requested_scopes=None,  # Nextcloud doesn't support scopes
-        )
+        try:
+            # Exchange for Nextcloud resource URI audience
+            exchanged_token, expires_in = await exchange_token_for_audience(
+                subject_token=mcp_token,
+                requested_audience=settings.nextcloud_resource_uri or "nextcloud",
+                requested_scopes=None,  # Nextcloud doesn't support scopes
+            )
+            oauth_token_exchange_total.labels(status="success").inc()

-        logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+            logger.info(f"Token exchange successful. Token expires in {expires_in}s")
+        except Exception:
+            oauth_token_exchange_total.labels(status="error").inc()
+            raise

        # Cache the exchanged token
        # Use the minimum of exchange TTL and configured cache TTL
@@ -35,6 +35,8 @@ from typing import Any, Optional
 import aiosqlite
 from cryptography.fernet import Fernet

+from nextcloud_mcp_server.observability.metrics import record_db_operation
+
 logger = logging.getLogger(__name__)


@@ -292,35 +294,43 @@ class RefreshTokenStorage:
        # For Flow 2, set provisioned_at timestamp
        provisioned_at = now if flow_type == "flow2" else None

-        async with aiosqlite.connect(self.db_path) as db:
-            await db.execute(
-                """
-                INSERT OR REPLACE INTO refresh_tokens
-                (user_id, encrypted_token, expires_at, created_at, updated_at,
-                 flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
-                VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
-                        ?, ?, ?, ?, ?)
-                """,
-                (
-                    user_id,
-                    encrypted_token,
-                    expires_at,
-                    user_id,
-                    now,
-                    now,
-                    flow_type,
-                    token_audience,
-                    provisioned_at,
-                    provisioning_client_id,
-                    scopes_json,
-                ),
-            )
-            await db.commit()
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                await db.execute(
+                    """
+                    INSERT OR REPLACE INTO refresh_tokens
+                    (user_id, encrypted_token, expires_at, created_at, updated_at,
+                     flow_type, token_audience, provisioned_at, provisioning_client_id, scopes)
+                    VALUES (?, ?, ?, COALESCE((SELECT created_at FROM refresh_tokens WHERE user_id = ?), ?), ?,
+                            ?, ?, ?, ?, ?)
+                    """,
+                    (
+                        user_id,
+                        encrypted_token,
+                        expires_at,
+                        user_id,
+                        now,
+                        now,
+                        flow_type,
+                        token_audience,
+                        provisioned_at,
+                        provisioning_client_id,
+                        scopes_json,
+                    ),
+                )
+                await db.commit()
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "success")

-        logger.info(
-            f"Stored refresh token for user {user_id}"
-            + (f" (expires at {expires_at})" if expires_at else "")
-        )
+            logger.info(
+                f"Stored refresh token for user {user_id}"
+                + (f" (expires at {expires_at})" if expires_at else "")
+            )
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "insert", duration, "error")
+            raise

        # Audit log
        await self._audit_log(
@@ -422,40 +432,45 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            async with db.execute(
-                """
-                SELECT encrypted_token, expires_at, flow_type, token_audience,
-                       provisioned_at, provisioning_client_id, scopes
-                FROM refresh_tokens WHERE user_id = ?
-                """,
-                (user_id,),
-            ) as cursor:
-                row = await cursor.fetchone()
-
-        if not row:
-            logger.debug(f"No refresh token found for user {user_id}")
-            return None
-
-        (
-            encrypted_token,
-            expires_at,
-            flow_type,
-            token_audience,
-            provisioned_at,
-            provisioning_client_id,
-            scopes_json,
-        ) = row
-
-        # Check expiration
-        if expires_at is not None and expires_at < time.time():
-            logger.warning(
-                f"Refresh token for user {user_id} has expired (expired at {expires_at})"
-            )
-            await self.delete_refresh_token(user_id)
-            return None
-
+        start_time = time.time()
        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                async with db.execute(
+                    """
+                    SELECT encrypted_token, expires_at, flow_type, token_audience,
+                           provisioned_at, provisioning_client_id, scopes
+                    FROM refresh_tokens WHERE user_id = ?
+                    """,
+                    (user_id,),
+                ) as cursor:
+                    row = await cursor.fetchone()
+
+            if not row:
+                logger.debug(f"No refresh token found for user {user_id}")
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
+            (
+                encrypted_token,
+                expires_at,
+                flow_type,
+                token_audience,
+                provisioned_at,
+                provisioning_client_id,
+                scopes_json,
+            ) = row
+
+            # Check expiration
+            if expires_at is not None and expires_at < time.time():
+                logger.warning(
+                    f"Refresh token for user {user_id} has expired (expired at {expires_at})"
+                )
+                await self.delete_refresh_token(user_id)
+                duration = time.time() - start_time
+                record_db_operation("sqlite", "select", duration, "success")
+                return None
+
            decrypted_token = self.cipher.decrypt(encrypted_token).decode()
            scopes = json.loads(scopes_json) if scopes_json else None

@@ -463,6 +478,9 @@ class RefreshTokenStorage:
                f"Retrieved refresh token for user {user_id} (flow_type: {flow_type})"
            )

+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "success")
+
            return {
                "refresh_token": decrypted_token,
                "expires_at": expires_at,
@@ -474,6 +492,8 @@ class RefreshTokenStorage:
                "scopes": scopes,
            }
        except Exception as e:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "select", duration, "error")
            logger.error(f"Failed to decrypt refresh token for user {user_id}: {e}")
            return None

@@ -568,25 +588,34 @@ class RefreshTokenStorage:
        if not self._initialized:
            await self.initialize()

-        async with aiosqlite.connect(self.db_path) as db:
-            cursor = await db.execute(
-                "DELETE FROM refresh_tokens WHERE user_id = ?",
-                (user_id,),
-            )
-            await db.commit()
-            deleted = cursor.rowcount > 0
+        start_time = time.time()
+        try:
+            async with aiosqlite.connect(self.db_path) as db:
+                cursor = await db.execute(
+                    "DELETE FROM refresh_tokens WHERE user_id = ?",
+                    (user_id,),
+                )
+                await db.commit()
+                deleted = cursor.rowcount > 0

-        if deleted:
-            logger.info(f"Deleted refresh token for user {user_id}")
-            await self._audit_log(
-                event="delete_refresh_token",
-                user_id=user_id,
-                auth_method="offline_access",
-            )
-        else:
-            logger.debug(f"No refresh token to delete for user {user_id}")
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "success")

-        return deleted
+            if deleted:
+                logger.info(f"Deleted refresh token for user {user_id}")
+                await self._audit_log(
+                    event="delete_refresh_token",
+                    user_id=user_id,
+                    auth_method="offline_access",
+                )
+            else:
+                logger.debug(f"No refresh token to delete for user {user_id}")
+
+            return deleted
+        except Exception:
+            duration = time.time() - start_time
+            record_db_operation("sqlite", "delete", duration, "error")
+            raise

    async def get_all_user_ids(self) -> list[str]:
        """
@@ -26,6 +26,10 @@ from jwt import PyJWKClient
 from mcp.server.auth.provider import AccessToken, TokenVerifier

 from nextcloud_mcp_server.config import Settings
+from nextcloud_mcp_server.observability.metrics import (
+    oauth_token_cache_hits_total,
+    record_oauth_token_validation,
+)

 logger = logging.getLogger(__name__)

@@ -105,8 +109,11 @@ class UnifiedTokenVerifier(TokenVerifier):
        cached = self._get_cached_token(token)
        if cached:
            logger.debug("Token found in cache")
+            oauth_token_cache_hits_total.labels(hit="true").inc()
            return cached

+        oauth_token_cache_hits_total.labels(hit="false").inc()
+
        # Both modes do the same validation (MCP audience only)
        return await self._verify_mcp_audience(token)

@@ -124,13 +131,24 @@ class UnifiedTokenVerifier(TokenVerifier):
        Returns:
            AccessToken if valid with MCP audience, None otherwise
        """
+        validation_method = "unknown"
        try:
            # Attempt JWT verification first
            if self._is_jwt_format(token) and self.jwks_client:
+                validation_method = "jwt"
                payload = await self._verify_jwt_signature(token)
+                if payload:
+                    record_oauth_token_validation("jwt", "valid")
+                else:
+                    record_oauth_token_validation("jwt", "invalid")
            else:
                # Fall back to introspection for opaque tokens
+                validation_method = "introspect"
                payload = await self._introspect_token(token)
+                if payload:
+                    record_oauth_token_validation("introspect", "valid")
+                else:
+                    record_oauth_token_validation("introspect", "invalid")
                if not payload:
                    return None

@@ -146,6 +164,8 @@ class UnifiedTokenVerifier(TokenVerifier):
                    f"Got {audiences}, need MCP ({self.settings.oidc_client_id} or "
                    f"{self.settings.nextcloud_mcp_server_url})"
                )
+                # Record as invalid due to audience mismatch
+                record_oauth_token_validation(validation_method, "invalid")
                return None

            # Log based on mode for clarity
@@ -163,6 +183,7 @@ class UnifiedTokenVerifier(TokenVerifier):

        except Exception as e:
            logger.error(f"Token verification failed: {e}")
+            record_oauth_token_validation(validation_method, "error")
            return None

    def _has_mcp_audience(self, payload: dict[str, Any]) -> bool:
@@ -489,6 +489,16 @@ async def user_info_html(request: Request) -> HTMLResponse:
            str(request.url_for("oauth_logout")) if oauth_ctx else "/oauth/logout"
        )

+    # Get Nextcloud host for generating links to apps (used by viz tab)
+    # Use public issuer URL if available (for browser-accessible links),
+    # otherwise fall back to NEXTCLOUD_HOST from settings
+    from nextcloud_mcp_server.config import get_settings
+
+    settings = get_settings()
+    nextcloud_host_for_links = (
+        os.getenv("NEXTCLOUD_PUBLIC_ISSUER_URL") or settings.nextcloud_host
+    )
+
    # Build host info HTML (BasicAuth only)
    host_info_html = ""
    if auth_mode == "basic":
@@ -658,6 +668,115 @@ async def user_info_html(request: Request) -> HTMLResponse:
        <!-- Alpine.js for tab state management -->
        <script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>

+        <!-- Plotly.js for vector visualization -->
+        <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script>
+
+        <!-- Vector visualization app (Alpine.js component) -->
+        <script>
+            function vizApp() {{
+                return {{
+                    query: '',
+                    algorithm: 'hybrid',
+                    showAdvanced: false,
+                    docTypes: [''],  // Default to "All Types"
+                    limit: 50,
+                    scoreThreshold: 0.7,
+                    semanticWeight: 0.5,
+                    keywordWeight: 0.3,
+                    fuzzyWeight: 0.2,
+                    loading: false,
+                    results: [],
+
+                    async executeSearch() {{
+                        this.loading = true;
+                        this.results = [];
+
+                        try {{
+                            const params = new URLSearchParams({{
+                                query: this.query,
+                                algorithm: this.algorithm,
+                                limit: this.limit,
+                                score_threshold: this.scoreThreshold,
+                                semantic_weight: this.semanticWeight,
+                                keyword_weight: this.keywordWeight,
+                                fuzzy_weight: this.fuzzyWeight,
+                            }});
+
+                            // Add doc_types parameter (filter out empty string for "All Types")
+                            const selectedTypes = this.docTypes.filter(t => t !== '');
+                            if (selectedTypes.length > 0) {{
+                                params.append('doc_types', selectedTypes.join(','));
+                            }}
+
+                            const response = await fetch(`/app/vector-viz/search?${{params}}`);
+                            const data = await response.json();
+
+                            if (data.success) {{
+                                this.results = data.results;
+                                this.renderPlot(data.coordinates_2d, data.results);
+                            }} else {{
+                                alert('Search failed: ' + data.error);
+                            }}
+                        }} catch (error) {{
+                            alert('Error: ' + error.message);
+                        }} finally {{
+                            this.loading = false;
+                        }}
+                    }},
+
+                    renderPlot(coordinates, results) {{
+                        const trace = {{
+                            x: coordinates.map(c => c[0]),
+                            y: coordinates.map(c => c[1]),
+                            mode: 'markers',
+                            type: 'scatter',
+                            text: results.map(r => `${{r.title}}<br>Score: ${{r.score.toFixed(3)}}`),
+                            marker: {{
+                                size: 8,
+                                color: results.map(r => r.score),
+                                colorscale: 'Viridis',
+                                showscale: true,
+                                colorbar: {{ title: 'Score' }},
+                                cmin: 0,
+                                cmax: 1
+                            }}
+                        }};
+
+                        const layout = {{
+                            title: `Vector Space (PCA 2D) - ${{results.length}} results`,
+                            xaxis: {{ title: 'PC1' }},
+                            yaxis: {{ title: 'PC2' }},
+                            hovermode: 'closest',
+                            height: 600
+                        }};
+
+                        Plotly.newPlot('viz-plot', [trace], layout);
+                    }},
+
+                    getNextcloudUrl(result) {{
+                        // Generate Nextcloud URL based on document type
+                        // Use the actual Nextcloud host (port 8080), not the MCP server
+                        const baseUrl = '{nextcloud_host_for_links}';
+
+                        switch (result.doc_type) {{
+                            case 'note':
+                                return `${{baseUrl}}/apps/notes/note/${{result.id}}`;
+                            case 'file':
+                                return `${{baseUrl}}/apps/files/?fileId=${{result.id}}`;
+                            case 'calendar':
+                                return `${{baseUrl}}/apps/calendar`;
+                            case 'contact':
+                                return `${{baseUrl}}/apps/contacts`;
+                            case 'deck':
+                                return `${{baseUrl}}/apps/deck`;
+                            default:
+                                return `${{baseUrl}}`;
+                        }}
+                    }}
+                }}
+            }}
+        </script>
+
        <style>
            body {{
                font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
@@ -846,6 +965,18 @@ async def user_info_html(request: Request) -> HTMLResponse:
                    Vector Sync
                </button>
                '''
+    }
+                {
+        ""
+        if not show_vector_sync_tab
+        else '''
+                <button
+                    class="tab"
+                    :class="activeTab === 'vector-viz' ? 'active' : ''"
+                    @click="activeTab = 'vector-viz'">
+                    Vector Viz
+                </button>
+                '''
    }
                {
        ""
@@ -881,6 +1012,19 @@ async def user_info_html(request: Request) -> HTMLResponse:

                {
        ""
+        if not show_vector_sync_tab
+        else '''
+                <!-- Vector Viz Tab -->
+                <div class="tab-pane" x-show="activeTab === 'vector-viz'" x-transition.opacity.duration.150ms>
+                    <div hx-get="/app/vector-viz" hx-trigger="load" hx-swap="outerHTML">
+                        <p style="color: #999;">Loading vector visualization...</p>
+                    </div>
+                </div>
+                '''
+    }
+
+                {
+        ""
        if not show_webhooks_tab
        else f'''
                <!-- Webhooks Tab (admin-only, loaded dynamically) -->
@@ -0,0 +1,610 @@
+"""Vector visualization routes for testing search algorithms.
+
+Provides a web UI for users to test different search algorithms on their own
+indexed documents and visualize results in 2D space using PCA.
+
+All processing happens server-side following ADR-012:
+- Search execution via shared search/algorithms.py
+- PCA dimensionality reduction (768-dim → 2D)
+- Only 2D coordinates + metadata sent to client
+- Bandwidth-efficient (2 floats per doc vs 768)
+"""
+
+import logging
+
+import numpy as np
+from starlette.authentication import requires
+from starlette.requests import Request
+from starlette.responses import HTMLResponse, JSONResponse
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.search import (
+    FuzzySearchAlgorithm,
+    HybridSearchAlgorithm,
+    KeywordSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)
+from nextcloud_mcp_server.vector.pca import PCA
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_html(request: Request) -> HTMLResponse:
+    """Vector visualization page with search controls and interactive plot.
+
+    Provides UI for testing search algorithms with real-time visualization.
+    Requires vector sync to be enabled.
+
+    Args:
+        request: Starlette request object
+
+    Returns:
+        HTML page with search interface
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return HTMLResponse(
+            """
+            <div>
+                <h2>Vector Visualization</h2>
+                <div style="padding: 20px; background: #fff3cd; border: 1px solid #ffc107; border-radius: 4px;">
+                    Vector sync is not enabled. Set VECTOR_SYNC_ENABLED=true to use this feature.
+                </div>
+            </div>
+            """
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name
+        if hasattr(request.user, "display_name")
+        else "unknown"
+    )
+
+    html_content = f"""
+        <style>
+            .viz-card {{
+                background: white;
+                border-radius: 8px;
+                padding: 20px;
+                margin-bottom: 20px;
+                box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+            }}
+            .viz-controls {{
+                margin-bottom: 20px;
+            }}
+            .viz-control-row {{
+                display: grid;
+                grid-template-columns: 2fr 1fr auto;
+                gap: 12px;
+                margin-bottom: 12px;
+                align-items: end;
+            }}
+            .viz-control-group {{
+                margin-bottom: 15px;
+            }}
+            .viz-control-group label {{
+                display: block;
+                margin-bottom: 5px;
+                font-weight: 500;
+                color: #333;
+            }}
+            .viz-control-group input[type="text"],
+            .viz-control-group input[type="number"],
+            .viz-control-group select {{
+                width: 100%;
+                padding: 8px 12px;
+                border: 1px solid #ddd;
+                border-radius: 4px;
+                font-size: 14px;
+            }}
+            .viz-control-group input[type="range"] {{
+                width: 100%;
+            }}
+            .viz-control-group select[multiple] {{
+                min-height: 100px;
+            }}
+            .viz-weight-display {{
+                display: inline-block;
+                min-width: 40px;
+                text-align: right;
+                color: #666;
+            }}
+            .viz-btn {{
+                background: #0066cc;
+                color: white;
+                border: none;
+                padding: 10px 20px;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 14px;
+                font-weight: 500;
+            }}
+            .viz-btn:hover {{
+                background: #0052a3;
+            }}
+            .viz-btn-secondary {{
+                background: #6c757d;
+                color: white;
+                border: none;
+                padding: 6px 12px;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 13px;
+                margin-bottom: 12px;
+            }}
+            .viz-btn-secondary:hover {{
+                background: #5a6268;
+            }}
+            #viz-plot-container {{
+                width: 100%;
+                height: 600px;
+                position: relative;
+            }}
+            #viz-plot {{
+                width: 100%;
+                height: 100%;
+            }}
+            .viz-loading {{
+                text-align: center;
+                padding: 40px;
+                color: #666;
+            }}
+            .viz-loading-overlay {{
+                position: absolute;
+                inset: 0;
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                background: white;
+                color: #666;
+            }}
+            .viz-no-results {{
+                text-align: center;
+                padding: 40px;
+                color: #666;
+                font-style: italic;
+            }}
+            .viz-advanced-section {{
+                margin-top: 16px;
+                padding: 16px;
+                background: #f8f9fa;
+                border-radius: 4px;
+                border: 1px solid #dee2e6;
+            }}
+            .viz-advanced-grid {{
+                display: grid;
+                grid-template-columns: 1fr 1fr;
+                gap: 20px;
+            }}
+            .viz-info-box {{
+                background: #e3f2fd;
+                border-left: 4px solid #2196f3;
+                padding: 12px;
+                margin-bottom: 20px;
+                font-size: 14px;
+            }}
+        </style>
+
+        <div x-data="vizApp()">
+            <div class="viz-card">
+                <h2>Vector Visualization</h2>
+                <div class="viz-info-box">
+                    Testing search algorithms on your indexed documents. User: <strong>{username}</strong>
+                </div>
+
+                <form @submit.prevent="executeSearch">
+                    <div class="viz-controls">
+                        <!-- Main Controls -->
+                        <div class="viz-control-group">
+                            <label>Search Query</label>
+                            <input type="text" x-model="query" placeholder="Enter search query..." required />
+                        </div>
+
+                        <div class="viz-control-row">
+                            <div class="viz-control-group" style="margin-bottom: 0;">
+                                <label>Algorithm</label>
+                                <select x-model="algorithm">
+                                    <option value="semantic">Semantic (Vector Similarity)</option>
+                                    <option value="keyword">Keyword (Token Matching)</option>
+                                    <option value="fuzzy">Fuzzy (Character Overlap)</option>
+                                    <option value="hybrid" selected>Hybrid (RRF Fusion)</option>
+                                </select>
+                            </div>
+
+                            <div style="display: flex; align-items: flex-end;">
+                                <button type="submit" class="viz-btn" style="width: 100%;">Search & Visualize</button>
+                            </div>
+
+                            <div style="display: flex; align-items: flex-end;">
+                                <button type="button" class="viz-btn-secondary" @click="showAdvanced = !showAdvanced" style="white-space: nowrap;">
+                                    <span x-text="showAdvanced ? 'Hide Advanced' : 'Advanced'"></span>
+                                </button>
+                            </div>
+                        </div>
+
+                        <!-- Advanced Options (Collapsible) -->
+                        <div class="viz-advanced-section" x-show="showAdvanced" x-transition.opacity.duration.200ms>
+                            <h3 style="margin-top: 0; margin-bottom: 16px; font-size: 16px;">Advanced Options</h3>
+
+                            <div class="viz-advanced-grid">
+                                <div class="viz-control-group">
+                                    <label>Document Types</label>
+                                    <select x-model="docTypes" multiple>
+                                        <option value="">All Types (cross-app search)</option>
+                                        <option value="note">Notes</option>
+                                        <option value="file">Files</option>
+                                        <option value="calendar">Calendar Events</option>
+                                        <option value="contact">Contacts</option>
+                                        <option value="deck">Deck Cards</option>
+                                    </select>
+                                    <small style="color: #666; display: block; margin-top: 4px;">
+                                        Hold Ctrl/Cmd to select multiple
+                                    </small>
+                                </div>
+
+                                <div>
+                                    <div class="viz-control-group">
+                                        <label>Score Threshold (Semantic/Hybrid)</label>
+                                        <input type="number" x-model.number="scoreThreshold" min="0" max="1" step="0.1" />
+                                    </div>
+
+                                    <div class="viz-control-group">
+                                        <label>Result Limit</label>
+                                        <input type="number" x-model.number="limit" min="1" max="100" />
+                                    </div>
+                                </div>
+                            </div>
+
+                            <!-- Hybrid Weights (only when hybrid selected) -->
+                            <div x-show="algorithm === 'hybrid'" style="margin-top: 16px; padding: 12px; background: #e9ecef; border-radius: 4px;">
+                                <label style="margin-bottom: 12px; display: block;">Hybrid Algorithm Weights</label>
+
+                                <div style="margin-bottom: 8px;">
+                                    <label style="display: inline-block; width: 100px; font-weight: normal;">Semantic:</label>
+                                    <input type="range" x-model.number="semanticWeight" min="0" max="1" step="0.1" style="width: 200px; display: inline-block;">
+                                    <span class="viz-weight-display" x-text="semanticWeight.toFixed(1)"></span>
+                                </div>
+                                <div style="margin-bottom: 8px;">
+                                    <label style="display: inline-block; width: 100px; font-weight: normal;">Keyword:</label>
+                                    <input type="range" x-model.number="keywordWeight" min="0" max="1" step="0.1" style="width: 200px; display: inline-block;">
+                                    <span class="viz-weight-display" x-text="keywordWeight.toFixed(1)"></span>
+                                </div>
+                                <div>
+                                    <label style="display: inline-block; width: 100px; font-weight: normal;">Fuzzy:</label>
+                                    <input type="range" x-model.number="fuzzyWeight" min="0" max="1" step="0.1" style="width: 200px; display: inline-block;">
+                                    <span class="viz-weight-display" x-text="fuzzyWeight.toFixed(1)"></span>
+                                </div>
+                            </div>
+                        </div>
+                    </div>
+                </form>
+            </div>
+
+            <div class="viz-card">
+                <div id="viz-plot-container">
+                    <div x-show="loading" class="viz-loading-overlay" x-transition.opacity.duration.200ms>
+                        Executing search and computing PCA projection...
+                    </div>
+                    <div id="viz-plot" x-show="!loading" x-transition.opacity.duration.200ms></div>
+                </div>
+            </div>
+
+            <div class="viz-card">
+                <h3>Search Results (<span x-text="loading ? '...' : results.length"></span>)</h3>
+
+                <div x-show="loading" class="viz-loading" x-transition.opacity.duration.200ms>
+                    Loading results...
+                </div>
+
+                <div x-show="!loading && results.length === 0" class="viz-no-results" x-transition.opacity.duration.200ms>
+                    No results found. Try a different query or adjust your search parameters.
+                </div>
+
+                <template x-if="!loading && results.length > 0">
+                    <div x-transition.opacity.duration.200ms>
+                        <template x-for="result in results" :key="result.id">
+                            <div style="padding: 12px; border-bottom: 1px solid #eee;">
+                                <a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
+                                    <span x-text="result.title"></span>
+                                </a>
+                                <div style="font-size: 14px; color: #666; margin-top: 4px;" x-text="result.excerpt"></div>
+                                <div style="font-size: 12px; color: #999; margin-top: 4px;">
+                                    Score: <span x-text="result.score.toFixed(3)"></span> |
+                                    Type: <span x-text="result.doc_type"></span>
+                                </div>
+                            </div>
+                        </template>
+                    </div>
+                </template>
+            </div>
+        </div>
+    """
+
+    return HTMLResponse(content=html_content)
+
+
+@requires("authenticated", redirect="oauth_login")
+async def vector_visualization_search(request: Request) -> JSONResponse:
+    """Execute server-side search and return 2D coordinates + results.
+
+    All processing happens server-side:
+    1. Execute search via shared algorithm module
+    2. Fetch matching vectors from Qdrant
+    3. Apply PCA reduction (768-dim → 2D)
+    4. Return coordinates + metadata only
+
+    Args:
+        request: Starlette request with query parameters
+
+    Returns:
+        JSON response with coordinates_2d and results
+    """
+    settings = get_settings()
+
+    if not settings.vector_sync_enabled:
+        return JSONResponse(
+            {"success": False, "error": "Vector sync not enabled"},
+            status_code=400,
+        )
+
+    # Get user info from auth context
+    username = (
+        request.user.display_name if hasattr(request.user, "display_name") else None
+    )
+
+    if not username:
+        return JSONResponse(
+            {"success": False, "error": "User not authenticated"},
+            status_code=401,
+        )
+
+    # Parse query parameters
+    query = request.query_params.get("query", "")
+    algorithm = request.query_params.get("algorithm", "hybrid")
+    limit = int(request.query_params.get("limit", "50"))
+    score_threshold = float(request.query_params.get("score_threshold", "0.7"))
+    semantic_weight = float(request.query_params.get("semantic_weight", "0.5"))
+    keyword_weight = float(request.query_params.get("keyword_weight", "0.3"))
+    fuzzy_weight = float(request.query_params.get("fuzzy_weight", "0.2"))
+
+    # Parse doc_types (comma-separated list, None = all types)
+    doc_types_param = request.query_params.get("doc_types", "")
+    doc_types = doc_types_param.split(",") if doc_types_param else None
+
+    logger.info(
+        f"Viz search: user={username}, query='{query}', "
+        f"algorithm={algorithm}, limit={limit}, doc_types={doc_types}"
+    )
+
+    try:
+        # Get authenticated HTTP client from session
+        # In BasicAuth mode: uses username/password from session
+        # In OAuth mode: uses access token from session
+        from nextcloud_mcp_server.auth.userinfo_routes import (
+            _get_authenticated_client_for_userinfo,
+        )
+        from nextcloud_mcp_server.client.notes import NotesClient
+
+        async with await _get_authenticated_client_for_userinfo(request) as http_client:
+            # Create NotesClient directly with authenticated HTTP client
+            notes_client = NotesClient(http_client, username)
+
+            # Wrap in a minimal client object for search algorithms
+            # This conforms to NextcloudClientProtocol but only implements notes
+            class MinimalNextcloudClient:
+                def __init__(self, notes_client, username):
+                    self._notes = notes_client
+                    self.username = username
+
+                @property
+                def notes(self):
+                    return self._notes
+
+                @property
+                def webdav(self):
+                    return None
+
+                @property
+                def calendar(self):
+                    return None
+
+                @property
+                def contacts(self):
+                    return None
+
+                @property
+                def deck(self):
+                    return None
+
+                @property
+                def cookbook(self):
+                    return None
+
+                @property
+                def tables(self):
+                    return None
+
+            nextcloud_client = MinimalNextcloudClient(notes_client, username)
+
+            # Create search algorithm
+            if algorithm == "semantic":
+                search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+            elif algorithm == "keyword":
+                search_algo = KeywordSearchAlgorithm()
+            elif algorithm == "fuzzy":
+                search_algo = FuzzySearchAlgorithm()
+            elif algorithm == "hybrid":
+                search_algo = HybridSearchAlgorithm(
+                    semantic_weight=semantic_weight,
+                    keyword_weight=keyword_weight,
+                    fuzzy_weight=fuzzy_weight,
+                )
+            else:
+                return JSONResponse(
+                    {"success": False, "error": f"Unknown algorithm: {algorithm}"},
+                    status_code=400,
+                )
+
+            # Execute search (supports cross-app when doc_types=None)
+            # Get unverified results with buffer for filtering
+            all_results = []
+            if doc_types is None or len(doc_types) == 0:
+                # Cross-app search - search all indexed types
+                unverified_results = await search_algo.search(
+                    query=query,
+                    user_id=username,
+                    limit=limit * 2,  # Buffer for verification filtering
+                    doc_type=None,  # Search all types
+                    score_threshold=score_threshold,
+                )
+                all_results.extend(unverified_results)
+            else:
+                # Search each document type and combine
+                for doc_type in doc_types:
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Buffer for verification filtering
+                        doc_type=doc_type,
+                        score_threshold=score_threshold,
+                    )
+                    all_results.extend(unverified_results)
+                # Sort by score before verification
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # Verify access for all results (deduplicates and filters)
+            from nextcloud_mcp_server.search.verification import verify_search_results
+
+            verified_results = await verify_search_results(
+                all_results, nextcloud_client
+            )
+            search_results = verified_results[:limit]
+
+        if not search_results:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [],
+                    "coordinates_2d": [],
+                    "message": "No results found",
+                }
+            )
+
+        # Fetch vectors for matching results from Qdrant
+        qdrant_client = await get_qdrant_client()
+        doc_ids = [r.id for r in search_results]
+
+        # Retrieve vectors for the matching documents
+        from qdrant_client.models import FieldCondition, Filter, MatchAny
+
+        points_response = await qdrant_client.scroll(
+            collection_name=settings.get_collection_name(),
+            scroll_filter=Filter(
+                must=[
+                    FieldCondition(
+                        key="doc_id",
+                        match=MatchAny(any=[str(doc_id) for doc_id in doc_ids]),
+                    ),
+                    FieldCondition(
+                        key="user_id",
+                        match={"value": username},
+                    ),
+                ]
+            ),
+            limit=len(doc_ids) * 2,  # Account for multiple chunks per doc
+            with_vectors=True,
+            with_payload=["doc_id"],  # Need doc_id to map vectors to results
+        )
+
+        points = points_response[0]
+
+        if not points:
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [],
+                    "coordinates_2d": [],
+                    "message": "No vectors found for results",
+                }
+            )
+
+        # Extract vectors
+        vectors = np.array([p.vector for p in points if p.vector is not None])
+
+        if len(vectors) < 2:
+            # Not enough points for PCA
+            return JSONResponse(
+                {
+                    "success": True,
+                    "results": [
+                        {
+                            "id": r.id,
+                            "doc_type": r.doc_type,
+                            "title": r.title,
+                            "excerpt": r.excerpt,
+                            "score": r.score,
+                        }
+                        for r in search_results
+                    ],
+                    "coordinates_2d": [[0, 0]] * len(search_results),
+                    "message": "Not enough vectors for PCA",
+                }
+            )
+
+        # Apply PCA dimensionality reduction (768-dim → 2D)
+        pca = PCA(n_components=2)
+        coords_2d = pca.fit_transform(vectors)
+
+        # After fit, these attributes are guaranteed to be set
+        assert pca.explained_variance_ratio_ is not None
+
+        logger.info(
+            f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
+            f"PC2={pca.explained_variance_ratio_[1]:.3f}"
+        )
+
+        # Map results to coordinates (use first chunk per document)
+        result_coords = []
+        seen_doc_ids = set()
+
+        for point, coord in zip(points, coords_2d):
+            if point.payload:
+                doc_id = int(point.payload.get("doc_id", 0))
+                if doc_id not in seen_doc_ids and doc_id in doc_ids:
+                    seen_doc_ids.add(doc_id)
+                    result_coords.append(coord.tolist())
+
+        # Build response
+        response_results = [
+            {
+                "id": r.id,
+                "doc_type": r.doc_type,
+                "title": r.title,
+                "excerpt": r.excerpt,
+                "score": r.score,
+            }
+            for r in search_results
+        ]
+
+        return JSONResponse(
+            {
+                "success": True,
+                "results": response_results,
+                "coordinates_2d": result_coords[: len(search_results)],
+                "pca_variance": {
+                    "pc1": float(pca.explained_variance_ratio_[0]),
+                    "pc2": float(pca.explained_variance_ratio_[1]),
+                },
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Viz search error: {e}", exc_info=True)
+        return JSONResponse(
+            {"success": False, "error": str(e)},
+            status_code=500,
+        )
@@ -288,8 +288,8 @@ def get_settings() -> Settings:
    return Settings(
        # OAuth/OIDC settings
        oidc_discovery_url=os.getenv("OIDC_DISCOVERY_URL"),
-        oidc_client_id=os.getenv("OIDC_CLIENT_ID"),
-        oidc_client_secret=os.getenv("OIDC_CLIENT_SECRET"),
+        oidc_client_id=os.getenv("NEXTCLOUD_OIDC_CLIENT_ID"),
+        oidc_client_secret=os.getenv("NEXTCLOUD_OIDC_CLIENT_SECRET"),
        oidc_issuer=os.getenv("OIDC_ISSUER"),
        # Nextcloud settings
        nextcloud_host=os.getenv("NEXTCLOUD_HOST"),
@@ -12,13 +12,24 @@ class NotesSearchController:
        """
        Search notes using token-based matching with relevance ranking.
        Returns notes sorted by relevance score.
+        If query is empty, returns all notes.
        """
        search_results = []
        query_tokens = self._process_query(query)

-        # If empty query after processing, return empty results
+        # If empty query after processing, return all notes
        if not query_tokens:
-            return []
+            async for note in notes:
+                search_results.append(
+                    {
+                        "id": note.get("id"),
+                        "title": note.get("title"),
+                        "category": note.get("category"),
+                        "modified": note.get("modified"),
+                        "_score": None,  # No score for unfiltered results
+                    }
+                )
+            return search_results

        # Process and score each note
        async for note in notes:
@@ -17,6 +17,7 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
        base_url: str,
        model: str = "nomic-embed-text",
        verify_ssl: bool = True,
+        timeout=httpx.Timeout(timeout=120, connect=5),
    ):
        """
        Initialize Ollama embedding provider.
@@ -29,8 +30,8 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
        self.base_url = base_url.rstrip("/")
        self.model = model
        self.verify_ssl = verify_ssl
-        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=30.0)
-        self._dimension = 768  # nomic-embed-text default
+        self.client = httpx.AsyncClient(verify=verify_ssl, timeout=timeout)
+        self._dimension: int | None = None  # Will be detected dynamically
        logger.info(
            f"Initialized Ollama provider: {base_url} (model={model}, verify_ssl={verify_ssl})"
        )
@@ -73,13 +74,36 @@ class OllamaEmbeddingProvider(EmbeddingProvider):
            embeddings.append(embedding)
        return embeddings

+    async def _detect_dimension(self):
+        """
+        Detect embedding dimension by generating a test embedding.
+
+        This method queries the model to determine the actual dimension
+        instead of relying on hardcoded values.
+        """
+        if self._dimension is None:
+            logger.debug(f"Detecting embedding dimension for model {self.model}...")
+            test_embedding = await self.embed("test")
+            self._dimension = len(test_embedding)
+            logger.info(
+                f"Detected embedding dimension: {self._dimension} for model {self.model}"
+            )
+
    def get_dimension(self) -> int:
        """
        Get embedding dimension.

        Returns:
-            Vector dimension (768 for nomic-embed-text)
+            Vector dimension for the configured model
+
+        Raises:
+            RuntimeError: If dimension not detected yet (call _detect_dimension first)
        """
+        if self._dimension is None:
+            raise RuntimeError(
+                f"Embedding dimension not detected yet for model {self.model}. "
+                "Call _detect_dimension() first or generate an embedding."
+            )
        return self._dimension

    def _check_model_is_loaded(self, autoload: bool = True):
@@ -352,3 +352,92 @@ def record_dependency_check(dependency: str, duration: float) -> None:
        duration: Check duration in seconds
    """
    dependency_check_duration_seconds.labels(dependency=dependency).observe(duration)
+
+
+def record_vector_sync_scan(documents_found: int) -> None:
+    """
+    Record documents scanned during vector sync.
+
+    Args:
+        documents_found: Number of documents discovered in scan
+    """
+    vector_sync_documents_scanned_total.inc(documents_found)
+
+
+def record_vector_sync_processing(duration: float, status: str = "success") -> None:
+    """
+    Record document processing with duration and status.
+
+    Args:
+        duration: Processing duration in seconds
+        status: "success" or "error"
+    """
+    vector_sync_documents_processed_total.labels(status=status).inc()
+    vector_sync_processing_duration_seconds.observe(duration)
+
+
+def record_qdrant_operation(operation: str, status: str = "success") -> None:
+    """
+    Record Qdrant vector database operation.
+
+    Args:
+        operation: Operation type ("upsert", "search", "delete")
+        status: "success" or "error"
+    """
+    qdrant_operations_total.labels(operation=operation, status=status).inc()
+
+
+def update_vector_sync_queue_size(size: int) -> None:
+    """
+    Update vector sync queue size gauge.
+
+    Args:
+        size: Current queue size
+    """
+    vector_sync_queue_size.set(size)
+
+
+# =============================================================================
+# Decorator for Automatic Tool Instrumentation
+# =============================================================================
+
+
+def instrument_tool(func):
+    """
+    Decorator to automatically instrument MCP tool functions with metrics.
+
+    Wraps async tool functions to record execution time and success/error status.
+    Compatible with @mcp.tool() and @require_scopes() decorators.
+
+    Usage:
+        @mcp.tool()
+        @require_scopes("notes:write")
+        @instrument_tool
+        async def nc_notes_create_note(...):
+            ...
+
+    Args:
+        func: The async function to instrument
+
+    Returns:
+        Wrapped function with metrics instrumentation
+    """
+    import functools
+    import time
+
+    @functools.wraps(func)
+    async def wrapper(*args, **kwargs):
+        tool_name = func.__name__
+        start_time = time.time()
+        try:
+            result = await func(*args, **kwargs)
+            duration = time.time() - start_time
+            record_tool_call(tool_name, duration, "success")
+            return result
+        except Exception as e:
+            duration = time.time() - start_time
+            record_tool_call(tool_name, duration, "error")
+            record_tool_error(tool_name, type(e).__name__)
+            raise
+
+    return wrapper
@@ -66,8 +66,12 @@ class ObservabilityMiddleware(BaseHTTPMiddleware):
        # Record start time
        start_time = time.time()

-        # Skip tracing for health/metrics endpoints to reduce noise
-        should_trace = not (path.startswith("/health/") or path == "/metrics")
+        # Skip tracing for health/metrics/polling endpoints to reduce noise
+        should_trace = not (
+            path.startswith("/health/")
+            or path == "/metrics"
+            or path == "/app/vector-sync/status"
+        )

        try:
            if should_trace:
@@ -0,0 +1,33 @@
+"""Search algorithms module for unified multi-algorithm search.
+
+This module provides a unified interface for different search algorithms:
+- Semantic search (vector similarity)
+- Keyword search (token-based matching)
+- Fuzzy search (character overlap)
+- Hybrid search (RRF fusion of multiple algorithms)
+
+All algorithms share the same interface and can be used interchangeably by both
+MCP tools and the visualization pane.
+"""
+
+from nextcloud_mcp_server.search.algorithms import (
+    NextcloudClientProtocol,
+    SearchAlgorithm,
+    SearchResult,
+    get_indexed_doc_types,
+)
+from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
+from nextcloud_mcp_server.search.hybrid import HybridSearchAlgorithm
+from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
+from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
+
+__all__ = [
+    "NextcloudClientProtocol",
+    "SearchAlgorithm",
+    "SearchResult",
+    "get_indexed_doc_types",
+    "SemanticSearchAlgorithm",
+    "KeywordSearchAlgorithm",
+    "FuzzySearchAlgorithm",
+    "HybridSearchAlgorithm",
+]
@@ -0,0 +1,200 @@
+"""Base interfaces and data structures for search algorithms."""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class NextcloudClientProtocol(Protocol):
+    """Protocol for Nextcloud client supporting multi-document search.
+
+    This protocol defines the interface that search algorithms need from a
+    Nextcloud client to access documents across different apps (Notes, Files,
+    Calendar, etc.). The client provides access to app-specific sub-clients
+    that handle the actual API calls.
+
+    Document types (e.g., "note", "file", "calendar") are NOT 1:1 with apps.
+    For example, the Notes app specializes in markdown files, while Files/WebDAV
+    handles multiple file types. The abstraction is at the document type level.
+
+    Search algorithms query Qdrant to determine which document types are actually
+    indexed before attempting to access them, enabling graceful cross-app search.
+    """
+
+    username: str
+
+    # App-specific clients that search algorithms dispatch to
+    @property
+    def notes(self) -> Any:
+        """Notes client for accessing note documents."""
+        ...
+
+    @property
+    def webdav(self) -> Any:
+        """WebDAV client for accessing file documents."""
+        ...
+
+    @property
+    def calendar(self) -> Any:
+        """Calendar client for accessing event/task documents."""
+        ...
+
+    @property
+    def contacts(self) -> Any:
+        """Contacts client for accessing contact card documents."""
+        ...
+
+    @property
+    def deck(self) -> Any:
+        """Deck client for accessing deck card documents."""
+        ...
+
+    @property
+    def cookbook(self) -> Any:
+        """Cookbook client for accessing recipe documents."""
+        ...
+
+    @property
+    def tables(self) -> Any:
+        """Tables client for accessing table row documents."""
+        ...
+
+
+async def get_indexed_doc_types(user_id: str) -> set[str]:
+    """Query Qdrant to get actually-indexed document types for a user.
+
+    This enables search algorithms to check which document types are available
+    before attempting to search/verify them, allowing graceful cross-app search.
+
+    Args:
+        user_id: User ID to filter by
+
+    Returns:
+        Set of document type strings (e.g., {"note", "file", "calendar"})
+
+    Example:
+        >>> types = await get_indexed_doc_types("alice")
+        >>> if "note" in types:
+        ...     # Search notes
+    """
+    import logging
+
+    from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+    from nextcloud_mcp_server.config import get_settings
+    from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+    logger = logging.getLogger(__name__)
+    settings = get_settings()
+
+    qdrant_client = await get_qdrant_client()
+    collection = settings.get_collection_name()
+
+    # Use scroll to sample documents and extract doc_types
+    # Note: This could be optimized with a facet/aggregation query if Qdrant adds support
+    try:
+        scroll_results, _next_offset = await qdrant_client.scroll(
+            collection_name=collection,
+            scroll_filter=Filter(
+                must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
+            ),
+            limit=1000,  # Sample size to discover types
+            with_payload=["doc_type"],
+            with_vectors=False,  # Don't need vectors for type discovery
+        )
+
+        doc_types = {
+            point.payload.get("doc_type")
+            for point in scroll_results
+            if point.payload.get("doc_type")
+        }
+
+        logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
+        return doc_types
+
+    except Exception as e:
+        logger.warning(f"Failed to query Qdrant for doc_types: {e}")
+        return set()
+
+
+@dataclass
+class SearchResult:
+    """A single search result with metadata and score.
+
+    Attributes:
+        id: Document ID
+        doc_type: Document type (note, file, calendar, contact, etc.)
+        title: Document title
+        excerpt: Content excerpt showing match context
+        score: Relevance score (0.0-1.0, higher is better)
+        metadata: Additional algorithm-specific metadata
+    """
+
+    id: int
+    doc_type: str
+    title: str
+    excerpt: str
+    score: float
+    metadata: dict[str, Any] | None = None
+
+    def __post_init__(self):
+        """Validate score is in valid range."""
+        if not 0.0 <= self.score <= 1.0:
+            raise ValueError(f"Score must be between 0.0 and 1.0, got {self.score}")
+
+
+class SearchAlgorithm(ABC):
+    """Abstract base class for search algorithms.
+
+    All search algorithms must implement the search() method with consistent
+    interface, allowing them to be used interchangeably.
+    """
+
+    @abstractmethod
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute search with the given parameters.
+
+        Args:
+            query: Search query string
+            user_id: User ID for multi-tenant filtering
+            limit: Maximum number of results to return
+            doc_type: Optional document type filter (note, file, calendar, etc.)
+            **kwargs: Algorithm-specific parameters
+
+        Returns:
+            List of SearchResult objects ranked by relevance
+
+        Raises:
+            McpError: If search fails or configuration is invalid
+        """
+        pass
+
+    @property
+    @abstractmethod
+    def name(self) -> str:
+        """Return algorithm name for identification."""
+        pass
+
+    @property
+    def supports_scoring(self) -> bool:
+        """Whether this algorithm provides meaningful relevance scores.
+
+        Default: True. Override if algorithm doesn't support scoring.
+        """
+        return True
+
+    @property
+    def requires_vector_db(self) -> bool:
+        """Whether this algorithm requires vector database.
+
+        Default: False. Override for semantic search.
+        """
+        return False
@@ -0,0 +1,219 @@
+"""Fuzzy search algorithm using character overlap matching on Qdrant payload."""
+
+import logging
+from typing import Any
+
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+class FuzzySearchAlgorithm(SearchAlgorithm):
+    """Fuzzy search using simple character-based similarity.
+
+    Implements character overlap matching with configurable threshold:
+    - Compares character sets between query and text
+    - Requires configurable % character overlap to match (default: 70%)
+    - Tolerant to typos and minor variations
+    """
+
+    def __init__(self, threshold: float = 0.7):
+        """Initialize fuzzy search algorithm.
+
+        Args:
+            threshold: Minimum character overlap ratio (0-1, default: 0.7)
+        """
+        if not 0.0 <= threshold <= 1.0:
+            raise ValueError(f"Threshold must be between 0.0 and 1.0, got {threshold}")
+        self.threshold = threshold
+
+    @property
+    def name(self) -> str:
+        return "fuzzy"
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute fuzzy search using character overlap on Qdrant payload.
+
+        Queries Qdrant for all indexed documents, then scores based on character
+        overlap in title and excerpt fields. Returns unverified results - access
+        verification should be performed separately at the final output stage.
+
+        Args:
+            query: Search query
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter (None = all types)
+            **kwargs: Additional parameters (threshold override)
+
+        Returns:
+            List of unverified SearchResult objects ranked by character overlap score
+        """
+        settings = get_settings()
+        threshold = kwargs.get("threshold", self.threshold)
+
+        logger.info(
+            f"Fuzzy search: query='{query}', user={user_id}, "
+            f"limit={limit}, threshold={threshold}, doc_type={doc_type}"
+        )
+
+        # Build Qdrant filter
+        filter_conditions = [
+            FieldCondition(key="user_id", match=MatchValue(value=user_id))
+        ]
+        if doc_type:
+            filter_conditions.append(
+                FieldCondition(key="doc_type", match=MatchValue(value=doc_type))
+            )
+
+        # Scroll through Qdrant to get all matching documents
+        qdrant_client = await get_qdrant_client()
+        collection = settings.get_collection_name()
+
+        all_points = []
+        offset = None
+
+        # Scroll through all points matching filter
+        while True:
+            scroll_result, next_offset = await qdrant_client.scroll(
+                collection_name=collection,
+                scroll_filter=Filter(must=filter_conditions),
+                limit=100,  # Batch size
+                offset=offset,
+                with_payload=["doc_id", "doc_type", "title", "excerpt", "chunk_index"],
+                with_vectors=False,  # Don't need vectors
+            )
+
+            all_points.extend(scroll_result)
+
+            if next_offset is None:
+                break
+            offset = next_offset
+
+        logger.debug(f"Retrieved {len(all_points)} points from Qdrant for fuzzy search")
+
+        # Deduplicate by (doc_id, doc_type) - keep first chunk
+        seen_docs = {}
+        for point in all_points:
+            doc_id = int(point.payload["doc_id"])
+            dtype = point.payload.get("doc_type", "note")
+            doc_key = (doc_id, dtype)
+
+            chunk_idx = point.payload.get("chunk_index", 0)
+            if doc_key not in seen_docs or chunk_idx == 0:
+                seen_docs[doc_key] = point
+
+        logger.debug(f"Deduplicated to {len(seen_docs)} unique documents")
+
+        # Score each document based on fuzzy matches
+        scored_results = []
+        query_lower = query.lower()
+
+        for doc_key, point in seen_docs.items():
+            doc_id, dtype = doc_key
+            title = point.payload.get("title", "")
+            excerpt = point.payload.get("excerpt", "")
+
+            # Check title match
+            title_score = self._calculate_char_overlap(query_lower, title.lower())
+
+            # Check excerpt match
+            excerpt_score = self._calculate_char_overlap(query_lower, excerpt.lower())
+
+            # Use best score
+            best_score = max(title_score, excerpt_score)
+
+            if best_score >= threshold:
+                match_location = "title" if title_score >= excerpt_score else "excerpt"
+                scored_results.append(
+                    {
+                        "doc_id": doc_id,
+                        "doc_type": dtype,
+                        "title": title,
+                        "excerpt": excerpt
+                        if excerpt_score >= title_score
+                        else f"Title match: {title}",
+                        "score": best_score,
+                        "match_location": match_location,
+                    }
+                )
+
+        # Sort by score (descending) and limit
+        scored_results.sort(key=lambda x: x["score"], reverse=True)
+        top_results = scored_results[:limit]
+
+        # Return unverified results (verification happens at output stage)
+        final_results = []
+        for result in top_results:
+            final_results.append(
+                SearchResult(
+                    id=result["doc_id"],
+                    doc_type=result["doc_type"],
+                    title=result["title"],
+                    excerpt=result["excerpt"],
+                    score=result["score"],
+                    metadata={"match_location": result["match_location"]},
+                )
+            )
+
+        logger.info(f"Fuzzy search returned {len(final_results)} unverified results")
+        if final_results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in final_results[:5]
+            ]
+            logger.debug(f"Top fuzzy results: {', '.join(result_details)}")
+
+        return final_results
+
+    def _calculate_char_overlap(self, query: str, text: str) -> float:
+        """Calculate character overlap ratio between query and text.
+
+        Args:
+            query: Query string (normalized)
+            text: Text to compare (normalized)
+
+        Returns:
+            Overlap ratio (0.0-1.0)
+        """
+        if not query or not text:
+            return 0.0
+
+        # Convert to character sets
+        query_chars = set(query)
+        text_chars = set(text)
+
+        # Calculate overlap
+        overlap = query_chars & text_chars
+        overlap_ratio = len(overlap) / len(query_chars)
+
+        return overlap_ratio
+
+    def _extract_excerpt(self, content: str, max_length: int = 200) -> str:
+        """Extract excerpt from content.
+
+        Args:
+            content: Full document content
+            max_length: Maximum excerpt length
+
+        Returns:
+            Excerpt string
+        """
+        if not content:
+            return ""
+
+        excerpt = content[:max_length].strip()
+        if len(content) > max_length:
+            excerpt += "..."
+
+        return excerpt
@@ -0,0 +1,237 @@
+"""Hybrid search algorithm using Reciprocal Rank Fusion (RRF)."""
+
+import asyncio
+import logging
+from collections import defaultdict
+from typing import Any
+
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.search.fuzzy import FuzzySearchAlgorithm
+from nextcloud_mcp_server.search.keyword import KeywordSearchAlgorithm
+from nextcloud_mcp_server.search.semantic import SemanticSearchAlgorithm
+
+logger = logging.getLogger(__name__)
+
+
+class HybridSearchAlgorithm(SearchAlgorithm):
+    """Hybrid search combining multiple algorithms using Reciprocal Rank Fusion.
+
+    Implements RRF from ADR-003 to combine results from:
+    - Semantic search (vector similarity)
+    - Keyword search (token matching)
+    - Fuzzy search (character overlap)
+
+    RRF formula: score = weight / (k + rank)
+    where k=60 (standard value) and rank is 1-indexed position.
+    """
+
+    DEFAULT_RRF_K = 60  # Standard RRF constant
+
+    def __init__(
+        self,
+        semantic_weight: float = 0.5,
+        keyword_weight: float = 0.3,
+        fuzzy_weight: float = 0.2,
+        rrf_k: int = DEFAULT_RRF_K,
+    ):
+        """Initialize hybrid search with algorithm weights.
+
+        Args:
+            semantic_weight: Weight for semantic results (default: 0.5)
+            keyword_weight: Weight for keyword results (default: 0.3)
+            fuzzy_weight: Weight for fuzzy results (default: 0.2)
+            rrf_k: RRF constant for rank decay (default: 60)
+
+        Raises:
+            ValueError: If weights are invalid
+        """
+        # Validate weights
+        if semantic_weight < 0 or keyword_weight < 0 or fuzzy_weight < 0:
+            raise ValueError("Weights must be non-negative")
+
+        total_weight = semantic_weight + keyword_weight + fuzzy_weight
+        if total_weight > 1.0:
+            raise ValueError(f"Weights sum to {total_weight:.2f}, must be ≤1.0")
+
+        if total_weight == 0.0:
+            raise ValueError("At least one weight must be > 0")
+
+        self.semantic_weight = semantic_weight
+        self.keyword_weight = keyword_weight
+        self.fuzzy_weight = fuzzy_weight
+        self.rrf_k = rrf_k
+        self.total_weight = total_weight
+
+        # Initialize sub-algorithms
+        self.semantic = SemanticSearchAlgorithm()
+        self.keyword = KeywordSearchAlgorithm()
+        self.fuzzy = FuzzySearchAlgorithm()
+
+    @property
+    def name(self) -> str:
+        return "hybrid"
+
+    @property
+    def requires_vector_db(self) -> bool:
+        # Requires vector DB if semantic search has non-zero weight
+        return self.semantic_weight > 0
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute hybrid search using RRF to combine algorithms.
+
+        Returns unverified results from combined algorithms. Access verification
+        should be performed separately at the final output stage.
+
+        Args:
+            query: Search query
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter
+            **kwargs: Additional parameters passed to sub-algorithms
+
+        Returns:
+            List of unverified SearchResult objects ranked by RRF combined score
+        """
+        logger.info(
+            f"Hybrid search: query='{query}', user={user_id}, limit={limit}, "
+            f"weights=(semantic={self.semantic_weight}, keyword={self.keyword_weight}, "
+            f"fuzzy={self.fuzzy_weight})"
+        )
+
+        # Run algorithms in parallel
+        tasks = []
+        algo_names = []
+
+        if self.semantic_weight > 0:
+            tasks.append(
+                self.semantic.search(query, user_id, limit * 2, doc_type, **kwargs)
+            )
+            algo_names.append("semantic")
+
+        if self.keyword_weight > 0:
+            tasks.append(
+                self.keyword.search(query, user_id, limit * 2, doc_type, **kwargs)
+            )
+            algo_names.append("keyword")
+
+        if self.fuzzy_weight > 0:
+            tasks.append(
+                self.fuzzy.search(query, user_id, limit * 2, doc_type, **kwargs)
+            )
+            algo_names.append("fuzzy")
+
+        # Execute searches in parallel
+        results_list = await asyncio.gather(*tasks)
+
+        # Build results dict
+        algo_results = {}
+        for algo_name, results in zip(algo_names, results_list):
+            algo_results[algo_name] = results
+            logger.debug(f"{algo_name} returned {len(results)} results")
+
+        # Combine using RRF
+        combined_results = self._reciprocal_rank_fusion(
+            algo_results,
+            {
+                "semantic": self.semantic_weight,
+                "keyword": self.keyword_weight,
+                "fuzzy": self.fuzzy_weight,
+            },
+            limit,
+        )
+
+        logger.info(f"Hybrid search returned {len(combined_results)} combined results")
+        if combined_results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in combined_results[:5]
+            ]
+            logger.debug(f"Top hybrid results: {', '.join(result_details)}")
+
+        return combined_results
+
+    def _reciprocal_rank_fusion(
+        self,
+        algo_results: dict[str, list[SearchResult]],
+        weights: dict[str, float],
+        limit: int,
+    ) -> list[SearchResult]:
+        """Combine multiple ranked result lists using RRF.
+
+        Args:
+            algo_results: Dict of algorithm_name -> ranked results
+            weights: Dict of algorithm_name -> weight (0-1)
+            limit: Maximum results to return
+
+        Returns:
+            Combined and re-ranked results
+        """
+        # Track RRF scores per document
+        rrf_scores: dict[tuple[int, str], float] = defaultdict(float)
+        # Track best result object for each document
+        best_results: dict[tuple[int, str], SearchResult] = {}
+
+        for algo_name, results in algo_results.items():
+            weight = weights.get(algo_name, 0.0)
+            if weight == 0:
+                continue
+
+            for rank, result in enumerate(results, start=1):
+                doc_key = (result.id, result.doc_type)
+
+                # RRF formula: weight / (k + rank)
+                rrf_score = weight / (self.rrf_k + rank)
+                rrf_scores[doc_key] += rrf_score
+
+                # Track best result object (prefer higher original scores)
+                if doc_key not in best_results:
+                    best_results[doc_key] = result
+                elif result.score > best_results[doc_key].score:
+                    best_results[doc_key] = result
+
+        # Sort by combined RRF score
+        sorted_docs = sorted(
+            rrf_scores.items(),
+            key=lambda x: x[1],
+            reverse=True,
+        )[:limit]
+
+        # Calculate normalization factor to scale RRF scores to 0-1 range
+        # Theoretical max RRF score = total_weight / (rrf_k + 1)
+        # Normalization factor = (rrf_k + 1) / total_weight
+        normalization_factor = (self.rrf_k + 1) / self.total_weight
+
+        # Build final results with normalized RRF scores
+        final_results = []
+        for doc_key, rrf_score in sorted_docs:
+            result = best_results[doc_key]
+
+            # Normalize RRF score to 0-1 range for better user comprehension
+            normalized_score = rrf_score * normalization_factor
+
+            # Create new result with normalized score
+            # Keep original metadata but add RRF details
+            metadata = result.metadata or {}
+            metadata["rrf_score_raw"] = rrf_score  # Original RRF score
+            metadata["original_score"] = result.score  # Original algorithm score
+            metadata["normalization_factor"] = normalization_factor
+
+            final_results.append(
+                SearchResult(
+                    id=result.id,
+                    doc_type=result.doc_type,
+                    title=result.title,
+                    excerpt=result.excerpt,
+                    score=normalized_score,  # Use normalized score (0-1 range)
+                    metadata=metadata,
+                )
+            )
+
+        return final_results
@@ -0,0 +1,277 @@
+"""Keyword search algorithm using token-based matching on Qdrant payload (ADR-001)."""
+
+import logging
+from typing import Any
+
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+class KeywordSearchAlgorithm(SearchAlgorithm):
+    """Keyword search using token-based matching with weighted scoring.
+
+    Implements token-based search from ADR-001:
+    - Title matches weighted 3x higher than content matches
+    - Case-insensitive token matching
+    - Relevance scoring based on match frequency and location
+    """
+
+    # Weighting constants from ADR-001
+    TITLE_WEIGHT = 3.0
+    CONTENT_WEIGHT = 1.0
+
+    @property
+    def name(self) -> str:
+        return "keyword"
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute keyword search using token matching on Qdrant payload.
+
+        Queries Qdrant for all indexed documents, then scores based on token
+        matches in title and excerpt fields. Returns unverified results - access
+        verification should be performed separately at the final output stage.
+
+        Args:
+            query: Search query to tokenize and match
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter (None = all types)
+            **kwargs: Additional parameters (unused)
+
+        Returns:
+            List of unverified SearchResult objects ranked by keyword match score
+        """
+        settings = get_settings()
+
+        logger.info(
+            f"Keyword search: query='{query}', user={user_id}, "
+            f"limit={limit}, doc_type={doc_type}"
+        )
+
+        # Tokenize query
+        query_tokens = self._process_query(query)
+        logger.debug(f"Query tokens: {query_tokens}")
+
+        # Build Qdrant filter
+        filter_conditions = [
+            FieldCondition(key="user_id", match=MatchValue(value=user_id))
+        ]
+        if doc_type:
+            filter_conditions.append(
+                FieldCondition(key="doc_type", match=MatchValue(value=doc_type))
+            )
+
+        # Scroll through Qdrant to get all matching documents
+        # We need title and excerpt from payload for token matching
+        qdrant_client = await get_qdrant_client()
+        collection = settings.get_collection_name()
+
+        all_points = []
+        offset = None
+
+        # Scroll through all points matching filter
+        while True:
+            scroll_result, next_offset = await qdrant_client.scroll(
+                collection_name=collection,
+                scroll_filter=Filter(must=filter_conditions),
+                limit=100,  # Batch size
+                offset=offset,
+                with_payload=[
+                    "doc_id",
+                    "doc_type",
+                    "title",
+                    "excerpt",
+                    "chunk_index",
+                    "total_chunks",
+                ],
+                with_vectors=False,  # Don't need vectors for keyword search
+            )
+
+            all_points.extend(scroll_result)
+
+            if next_offset is None:
+                break
+            offset = next_offset
+
+        logger.debug(
+            f"Retrieved {len(all_points)} points from Qdrant for keyword search"
+        )
+
+        # Deduplicate by (doc_id, doc_type) - keep best chunk per document
+        seen_docs = {}
+        for point in all_points:
+            doc_id = int(point.payload["doc_id"])
+            dtype = point.payload.get("doc_type", "note")
+            doc_key = (doc_id, dtype)
+
+            # Keep first chunk (chunk_index=0) as it has the most relevant content
+            chunk_idx = point.payload.get("chunk_index", 0)
+            if doc_key not in seen_docs or chunk_idx == 0:
+                seen_docs[doc_key] = point
+
+        logger.debug(f"Deduplicated to {len(seen_docs)} unique documents")
+
+        # Score each document based on keyword matches
+        scored_results = []
+        for doc_key, point in seen_docs.items():
+            doc_id, dtype = doc_key
+            title = point.payload.get("title", "")
+            excerpt = point.payload.get("excerpt", "")
+
+            # Calculate keyword match score
+            score = self._calculate_score(query_tokens, title, excerpt)
+
+            if score > 0:  # Only include matches
+                scored_results.append(
+                    {
+                        "doc_id": doc_id,
+                        "doc_type": dtype,
+                        "title": title,
+                        "excerpt": excerpt,
+                        "score": score,
+                    }
+                )
+
+        # Sort by score (descending) and limit
+        scored_results.sort(key=lambda x: x["score"], reverse=True)
+        top_results = scored_results[:limit]
+
+        # Return unverified results (verification happens at output stage)
+        final_results = []
+        for result in top_results:
+            final_results.append(
+                SearchResult(
+                    id=result["doc_id"],
+                    doc_type=result["doc_type"],
+                    title=result["title"],
+                    excerpt=result["excerpt"],
+                    score=result["score"],
+                    metadata={},
+                )
+            )
+
+        logger.info(f"Keyword search returned {len(final_results)} unverified results")
+        if final_results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in final_results[:5]
+            ]
+            logger.debug(f"Top keyword results: {', '.join(result_details)}")
+
+        return final_results
+
+    def _process_query(self, query: str) -> list[str]:
+        """Tokenize and normalize query.
+
+        Args:
+            query: Raw query string
+
+        Returns:
+            List of normalized tokens
+        """
+        # Convert to lowercase and split into tokens
+        tokens = query.lower().split()
+
+        # Filter out very short tokens (optional)
+        tokens = [token for token in tokens if len(token) > 1]
+
+        return tokens
+
+    def _calculate_score(
+        self, query_tokens: list[str], title: str, content: str
+    ) -> float:
+        """Calculate relevance score based on token matches.
+
+        Args:
+            query_tokens: List of query tokens
+            title: Document title
+            content: Document content
+
+        Returns:
+            Relevance score (0.0-1.0)
+        """
+        if not query_tokens:
+            return 0.0
+
+        # Process title and content
+        title_tokens = title.lower().split()
+        content_tokens = content.lower().split()
+
+        score = 0.0
+
+        # Count matches in title
+        title_matches = sum(1 for qt in query_tokens if qt in title_tokens)
+        if query_tokens:  # Avoid division by zero
+            title_match_ratio = title_matches / len(query_tokens)
+            score += self.TITLE_WEIGHT * title_match_ratio
+
+        # Count matches in content
+        content_matches = sum(1 for qt in query_tokens if qt in content_tokens)
+        if query_tokens:
+            content_match_ratio = content_matches / len(query_tokens)
+            score += self.CONTENT_WEIGHT * content_match_ratio
+
+        # Normalize score to 0-1 range
+        # Max score would be TITLE_WEIGHT + CONTENT_WEIGHT if all tokens match everywhere
+        max_score = self.TITLE_WEIGHT + self.CONTENT_WEIGHT
+        normalized_score = min(score / max_score, 1.0)
+
+        return normalized_score
+
+    def _extract_excerpt(
+        self, content: str, query_tokens: list[str], max_length: int = 200
+    ) -> str:
+        """Extract excerpt showing match context.
+
+        Args:
+            content: Full document content
+            query_tokens: Query tokens to find
+            max_length: Maximum excerpt length in characters
+
+        Returns:
+            Excerpt string with context around matches
+        """
+        if not content:
+            return ""
+
+        content_lower = content.lower()
+
+        # Find first occurrence of any query token
+        first_match_pos = -1
+        for token in query_tokens:
+            pos = content_lower.find(token)
+            if pos != -1:
+                if first_match_pos == -1 or pos < first_match_pos:
+                    first_match_pos = pos
+
+        if first_match_pos == -1:
+            # No matches found, return beginning
+            return content[:max_length].strip() + (
+                "..." if len(content) > max_length else ""
+            )
+
+        # Extract context around match
+        start = max(0, first_match_pos - max_length // 2)
+        end = min(len(content), first_match_pos + max_length // 2)
+
+        excerpt = content[start:end].strip()
+
+        # Add ellipsis if truncated
+        if start > 0:
+            excerpt = "..." + excerpt
+        if end < len(content):
+            excerpt = excerpt + "..."
+
+        return excerpt
@@ -0,0 +1,166 @@
+"""Semantic search algorithm using vector similarity (Qdrant)."""
+
+import logging
+from typing import Any
+
+from qdrant_client.models import FieldCondition, Filter, MatchValue
+
+from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.embedding import get_embedding_service
+from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
+from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+logger = logging.getLogger(__name__)
+
+
+class SemanticSearchAlgorithm(SearchAlgorithm):
+    """Semantic search using vector similarity in Qdrant.
+
+    Searches documents by meaning rather than exact keywords using
+    768-dimensional embeddings and cosine distance.
+    """
+
+    def __init__(self, score_threshold: float = 0.7):
+        """Initialize semantic search algorithm.
+
+        Args:
+            score_threshold: Minimum similarity score (0-1, default: 0.7)
+        """
+        self.score_threshold = score_threshold
+
+    @property
+    def name(self) -> str:
+        return "semantic"
+
+    @property
+    def requires_vector_db(self) -> bool:
+        return True
+
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        doc_type: str | None = None,
+        **kwargs: Any,
+    ) -> list[SearchResult]:
+        """Execute semantic search using vector similarity.
+
+        Returns unverified results from Qdrant. Access verification should be
+        performed separately at the final output stage using verify_search_results().
+
+        Args:
+            query: Natural language search query
+            user_id: User ID for filtering
+            limit: Maximum results to return
+            doc_type: Optional document type filter
+            **kwargs: Additional parameters (score_threshold override)
+
+        Returns:
+            List of unverified SearchResult objects ranked by similarity score
+
+        Raises:
+            McpError: If vector sync is not enabled or search fails
+        """
+        settings = get_settings()
+        score_threshold = kwargs.get("score_threshold", self.score_threshold)
+
+        logger.info(
+            f"Semantic search: query='{query}', user={user_id}, "
+            f"limit={limit}, score_threshold={score_threshold}, doc_type={doc_type}"
+        )
+
+        # Generate embedding for query
+        embedding_service = get_embedding_service()
+        query_embedding = await embedding_service.embed(query)
+        logger.debug(
+            f"Generated embedding for query (dimension={len(query_embedding)})"
+        )
+
+        # Build Qdrant filter
+        filter_conditions = [
+            FieldCondition(
+                key="user_id",
+                match=MatchValue(value=user_id),
+            )
+        ]
+
+        # Add doc_type filter if specified
+        if doc_type:
+            filter_conditions.append(
+                FieldCondition(
+                    key="doc_type",
+                    match=MatchValue(value=doc_type),
+                )
+            )
+
+        # Search Qdrant
+        qdrant_client = await get_qdrant_client()
+        try:
+            search_response = await qdrant_client.query_points(
+                collection_name=settings.get_collection_name(),
+                query=query_embedding,
+                query_filter=Filter(must=filter_conditions),
+                limit=limit * 2,  # Get extra for deduplication
+                score_threshold=score_threshold,
+                with_payload=True,
+                with_vectors=False,  # Don't return vectors to save bandwidth
+            )
+            record_qdrant_operation("search", "success")
+        except Exception:
+            record_qdrant_operation("search", "error")
+            raise
+
+        logger.info(
+            f"Qdrant returned {len(search_response.points)} results "
+            f"(before deduplication)"
+        )
+
+        if search_response.points:
+            # Log top 3 scores to help with threshold tuning
+            top_scores = [p.score for p in search_response.points[:3]]
+            logger.debug(f"Top 3 similarity scores: {top_scores}")
+
+        # Deduplicate by (doc_id, doc_type) - multiple chunks per document
+        seen_docs = set()
+        results = []
+
+        for result in search_response.points:
+            doc_id = int(result.payload["doc_id"])
+            doc_type = result.payload.get("doc_type", "note")
+            doc_key = (doc_id, doc_type)
+
+            # Skip if we've already seen this document
+            if doc_key in seen_docs:
+                continue
+
+            seen_docs.add(doc_key)
+
+            # Return unverified results (verification happens at output stage)
+            results.append(
+                SearchResult(
+                    id=doc_id,
+                    doc_type=doc_type,
+                    title=result.payload.get("title", "Untitled"),
+                    excerpt=result.payload.get("excerpt", ""),
+                    score=result.score,
+                    metadata={
+                        "chunk_index": result.payload.get("chunk_index"),
+                        "total_chunks": result.payload.get("total_chunks"),
+                    },
+                )
+            )
+
+            if len(results) >= limit:
+                break
+
+        logger.info(f"Returning {len(results)} unverified results after deduplication")
+        if results:
+            result_details = [
+                f"{r.doc_type}_{r.id} (score={r.score:.3f}, title='{r.title}')"
+                for r in results[:5]  # Show top 5
+            ]
+            logger.debug(f"Top results: {', '.join(result_details)}")
+
+        return results
@@ -0,0 +1,122 @@
+"""Access verification for search results.
+
+This module provides centralized verification of Nextcloud access permissions
+for search results. Verification happens at the final output stage (MCP tool/viz endpoint)
+rather than within individual search algorithms, preventing redundant API calls.
+
+Key benefits:
+- Deduplication: Each document verified exactly once (even in hybrid mode)
+- Parallel execution: All verifications run concurrently via anyio task groups
+- Separation of concerns: Algorithms handle scoring, this module handles security
+"""
+
+import logging
+from dataclasses import replace
+from typing import Protocol
+
+import anyio
+
+from nextcloud_mcp_server.search.algorithms import SearchResult
+
+logger = logging.getLogger(__name__)
+
+
+class NextcloudClientProtocol(Protocol):
+    """Protocol for Nextcloud client with app-specific access."""
+
+    @property
+    def notes(self):
+        """Notes client for accessing notes API."""
+        ...
+
+
+async def verify_search_results(
+    results: list[SearchResult],
+    nextcloud_client: NextcloudClientProtocol,
+) -> list[SearchResult]:
+    """
+    Verify Nextcloud access for search results.
+
+    Deduplicates by (doc_id, doc_type), verifies in parallel using anyio task groups,
+    and filters out inaccessible documents. Maintains original result ordering.
+
+    Args:
+        results: Unverified search results from Qdrant
+        nextcloud_client: Nextcloud client for access checks
+
+    Returns:
+        Verified and accessible results (same order as input)
+
+    Example:
+        >>> unverified = await search_algo.search(query="test", limit=10)
+        >>> verified = await verify_search_results(unverified, client)
+        >>> # verified contains only documents user can access
+    """
+    # Deduplicate by (doc_id, doc_type) while preserving order
+    # This is critical for hybrid search where same doc may appear in multiple algorithm results
+    seen = set()
+    unique_results = []
+    for result in results:
+        key = (result.id, result.doc_type)
+        if key not in seen:
+            seen.add(key)
+            unique_results.append(result)
+
+    if not unique_results:
+        return []
+
+    logger.debug(
+        f"Verifying access for {len(unique_results)} unique documents "
+        f"(from {len(results)} total results)"
+    )
+
+    # Verify all unique documents in parallel using anyio task group
+    # Use list to maintain order (index-based storage)
+    verified_results = [None] * len(unique_results)
+
+    async def verify_one(index: int, result: SearchResult):
+        """
+        Verify a single document and store result at index.
+
+        Args:
+            index: Position in verified_results list
+            result: Search result to verify
+        """
+        try:
+            if result.doc_type == "note":
+                # Fetch note to verify access and get fresh metadata
+                note = await nextcloud_client.notes.get_note(result.id)
+                # Update metadata with fresh data from Nextcloud
+                updated_metadata = {**(result.metadata or {}), **note}
+                verified_results[index] = replace(result, metadata=updated_metadata)
+            # TODO: Add verification for other doc types (calendar, deck, file, etc.)
+            else:
+                # For now, assume other types are accessible
+                # In production, add proper verification for each type
+                logger.debug(
+                    f"No verification implemented for doc_type={result.doc_type}, "
+                    "assuming accessible"
+                )
+                verified_results[index] = result
+
+        except Exception as e:
+            # Document is inaccessible (403, 404, or other error)
+            # Log at debug level since this is expected for filtered results
+            logger.debug(f"Document {result.doc_type}/{result.id} not accessible: {e}")
+            verified_results[index] = None
+
+    # Run all verifications in parallel using anyio task group
+    # This provides structured concurrency with automatic cancellation on errors
+    async with anyio.create_task_group() as tg:
+        for idx, result in enumerate(unique_results):
+            tg.start_soon(verify_one, idx, result)
+
+    # Filter out None (inaccessible) and return verified results
+    accessible = [r for r in verified_results if r is not None]
+
+    logger.debug(
+        f"Verification complete: {len(accessible)} accessible, "
+        f"{len(unique_results) - len(accessible)} filtered out"
+    )
+
+    return accessible
@@ -12,6 +12,7 @@ from nextcloud_mcp_server.models.calendar import (
    ListTodosResponse,
    Todo,
 )
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -20,6 +21,7 @@ def configure_calendar_tools(mcp: FastMCP):
    # Calendar tools
    @mcp.tool()
    @require_scopes("calendar:read")
+    @instrument_tool
    async def nc_calendar_list_calendars(ctx: Context) -> ListCalendarsResponse:
        """List all available calendars for the user"""
        client = await get_client(ctx)
@@ -30,6 +32,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_create_event(
        calendar_name: str,
        title: str,
@@ -106,6 +109,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:read")
+    @instrument_tool
    async def nc_calendar_list_events(
        calendar_name: str,
        ctx: Context,
@@ -208,6 +212,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:read")
+    @instrument_tool
    async def nc_calendar_get_event(
        calendar_name: str,
        event_uid: str,
@@ -220,6 +225,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_update_event(
        calendar_name: str,
        event_uid: str,
@@ -293,6 +299,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_delete_event(
        calendar_name: str,
        event_uid: str,
@@ -304,6 +311,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_create_meeting(
        title: str,
        date: str,
@@ -370,6 +378,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:read")
+    @instrument_tool
    async def nc_calendar_get_upcoming_events(
        ctx: Context,
        calendar_name: str = "",  # Empty = all calendars
@@ -420,6 +429,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:read")
+    @instrument_tool
    async def nc_calendar_find_availability(
        duration_minutes: int,
        ctx: Context,
@@ -500,6 +510,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_bulk_operations(
        operation: str,  # "update", "delete", "move"
        ctx: Context,
@@ -749,6 +760,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("calendar:write")
+    @instrument_tool
    async def nc_calendar_manage_calendar(
        action: str,  # "create", "delete", "update", "list"
        ctx: Context,
@@ -818,6 +830,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("todo:read", "calendar:read")
+    @instrument_tool
    async def nc_calendar_list_todos(
        calendar_name: str,
        ctx: Context,
@@ -863,6 +876,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("todo:write", "calendar:read")
+    @instrument_tool
    async def nc_calendar_create_todo(
        calendar_name: str,
        summary: str,
@@ -906,6 +920,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("todo:write", "calendar:read")
+    @instrument_tool
    async def nc_calendar_update_todo(
        calendar_name: str,
        todo_uid: str,
@@ -966,6 +981,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("todo:write", "calendar:read")
+    @instrument_tool
    async def nc_calendar_delete_todo(
        calendar_name: str,
        todo_uid: str,
@@ -986,6 +1002,7 @@ def configure_calendar_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("todo:read", "calendar:read")
+    @instrument_tool
    async def nc_calendar_search_todos(
        ctx: Context,
        status: Optional[str] = None,
@@ -4,6 +4,7 @@ from mcp.server.fastmcp import Context, FastMCP

 from nextcloud_mcp_server.auth import require_scopes
 from nextcloud_mcp_server.context import get_client
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -12,6 +13,7 @@ def configure_contacts_tools(mcp: FastMCP):
    # Contacts tools
    @mcp.tool()
    @require_scopes("contacts:read")
+    @instrument_tool
    async def nc_contacts_list_addressbooks(ctx: Context):
        """List all addressbooks for the user."""
        client = await get_client(ctx)
@@ -19,6 +21,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:read")
+    @instrument_tool
    async def nc_contacts_list_contacts(ctx: Context, *, addressbook: str):
        """List all contacts in the specified addressbook."""
        client = await get_client(ctx)
@@ -26,6 +29,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:write")
+    @instrument_tool
    async def nc_contacts_create_addressbook(
        ctx: Context, *, name: str, display_name: str
    ):
@@ -42,6 +46,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:write")
+    @instrument_tool
    async def nc_contacts_delete_addressbook(ctx: Context, *, name: str):
        """Delete an addressbook."""
        client = await get_client(ctx)
@@ -49,6 +54,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:write")
+    @instrument_tool
    async def nc_contacts_create_contact(
        ctx: Context, *, addressbook: str, uid: str, contact_data: dict
    ):
@@ -66,6 +72,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:write")
+    @instrument_tool
    async def nc_contacts_delete_contact(ctx: Context, *, addressbook: str, uid: str):
        """Delete a contact."""
        client = await get_client(ctx)
@@ -73,6 +80,7 @@ def configure_contacts_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("contacts:write")
+    @instrument_tool
    async def nc_contacts_update_contact(
        ctx: Context, *, addressbook: str, uid: str, contact_data: dict, etag: str = ""
    ):
@@ -24,6 +24,7 @@ from nextcloud_mcp_server.models.cookbook import (
    UpdateRecipeResponse,
    Version,
 )
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -72,6 +73,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_import_recipe(url: str, ctx: Context) -> ImportRecipeResponse:
        """Import a recipe from a URL using schema.org metadata.

@@ -129,6 +131,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_list_recipes(ctx: Context) -> ListRecipesResponse:
        """Get all recipes in the database"""
        client = await get_client(ctx)
@@ -154,6 +157,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_get_recipe(recipe_id: int, ctx: Context) -> Recipe:
        """Get a specific recipe by its ID"""
        client = await get_client(ctx)
@@ -179,6 +183,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_create_recipe(
        name: str,
        description: str | None = None,
@@ -258,6 +263,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_update_recipe(
        recipe_id: int,
        name: str | None = None,
@@ -347,6 +353,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_delete_recipe(
        recipe_id: int, ctx: Context
    ) -> DeleteRecipeResponse:
@@ -382,6 +389,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_search_recipes(
        query: str, ctx: Context
    ) -> SearchRecipesResponse:
@@ -418,6 +426,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_list_categories(ctx: Context) -> ListCategoriesResponse:
        """Get all known categories.

@@ -445,6 +454,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_get_recipes_in_category(
        category: str, ctx: Context
    ) -> ListRecipesResponse:
@@ -481,6 +491,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_list_keywords(ctx: Context) -> ListKeywordsResponse:
        """Get all known keywords/tags"""
        client = await get_client(ctx)
@@ -506,6 +517,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:read")
+    @instrument_tool
    async def nc_cookbook_get_recipes_with_keywords(
        keywords: list[str], ctx: Context
    ) -> ListRecipesResponse:
@@ -540,6 +552,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_set_config(
        folder: str | None = None,
        update_interval: int | None = None,
@@ -583,6 +596,7 @@ def configure_cookbook_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("cookbook:write")
+    @instrument_tool
    async def nc_cookbook_reindex(ctx: Context) -> ReindexResponse:
        """Trigger a rescan of all recipes into the caching database.

@@ -18,6 +18,7 @@ from nextcloud_mcp_server.models.deck import (
    LabelOperationResponse,
    StackOperationResponse,
 )
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -118,6 +119,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_boards(ctx: Context) -> list[DeckBoard]:
        """Get all Nextcloud Deck boards"""
        client = await get_client(ctx)
@@ -126,6 +128,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_board(ctx: Context, board_id: int) -> DeckBoard:
        """Get details of a specific Nextcloud Deck board"""
        client = await get_client(ctx)
@@ -134,6 +137,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_stacks(ctx: Context, board_id: int) -> list[DeckStack]:
        """Get all stacks in a Nextcloud Deck board"""
        client = await get_client(ctx)
@@ -142,6 +146,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_stack(ctx: Context, board_id: int, stack_id: int) -> DeckStack:
        """Get details of a specific Nextcloud Deck stack"""
        client = await get_client(ctx)
@@ -150,6 +155,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_cards(
        ctx: Context, board_id: int, stack_id: int
    ) -> list[DeckCard]:
@@ -162,6 +168,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int
    ) -> DeckCard:
@@ -172,6 +179,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_labels(ctx: Context, board_id: int) -> list[DeckLabel]:
        """Get all labels in a Nextcloud Deck board"""
        client = await get_client(ctx)
@@ -180,6 +188,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:read")
+    @instrument_tool
    async def deck_get_label(ctx: Context, board_id: int, label_id: int) -> DeckLabel:
        """Get details of a specific Nextcloud Deck label"""
        client = await get_client(ctx)
@@ -190,6 +199,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_create_board(
        ctx: Context, title: str, color: str
    ) -> CreateBoardResponse:
@@ -207,6 +217,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_create_stack(
        ctx: Context, board_id: int, title: str, order: int
    ) -> CreateStackResponse:
@@ -223,6 +234,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_update_stack(
        ctx: Context,
        board_id: int,
@@ -249,6 +261,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_delete_stack(
        ctx: Context, board_id: int, stack_id: int
    ) -> StackOperationResponse:
@@ -270,6 +283,7 @@ def configure_deck_tools(mcp: FastMCP):
    # Card Tools
    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_create_card(
        ctx: Context,
        board_id: int,
@@ -304,6 +318,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_update_card(
        ctx: Context,
        board_id: int,
@@ -357,6 +372,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_delete_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int
    ) -> CardOperationResponse:
@@ -379,6 +395,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_archive_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int
    ) -> CardOperationResponse:
@@ -401,6 +418,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_unarchive_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int
    ) -> CardOperationResponse:
@@ -423,6 +441,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_reorder_card(
        ctx: Context,
        board_id: int,
@@ -455,6 +474,7 @@ def configure_deck_tools(mcp: FastMCP):
    # Label Tools
    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_create_label(
        ctx: Context, board_id: int, title: str, color: str
    ) -> CreateLabelResponse:
@@ -471,6 +491,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_update_label(
        ctx: Context,
        board_id: int,
@@ -497,6 +518,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_delete_label(
        ctx: Context, board_id: int, label_id: int
    ) -> LabelOperationResponse:
@@ -518,6 +540,7 @@ def configure_deck_tools(mcp: FastMCP):
    # Card-Label Assignment Tools
    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_assign_label_to_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int, label_id: int
    ) -> CardOperationResponse:
@@ -541,6 +564,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_remove_label_from_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int, label_id: int
    ) -> CardOperationResponse:
@@ -565,6 +589,7 @@ def configure_deck_tools(mcp: FastMCP):
    # Card-User Assignment Tools
    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_assign_user_to_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int, user_id: str
    ) -> CardOperationResponse:
@@ -588,6 +613,7 @@ def configure_deck_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("deck:write")
+    @instrument_tool
    async def deck_unassign_user_from_card(
        ctx: Context, board_id: int, stack_id: int, card_id: int, user_id: str
    ) -> CardOperationResponse:
@@ -17,6 +17,7 @@ from nextcloud_mcp_server.models.notes import (
    SearchNotesResponse,
    UpdateNoteResponse,
 )
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -86,6 +87,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:write")
+    @instrument_tool
    async def nc_notes_create_note(
        title: str, content: str, category: str, ctx: Context
    ) -> CreateNoteResponse:
@@ -132,6 +134,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:write")
+    @instrument_tool
    async def nc_notes_update_note(
        note_id: int,
        etag: str,
@@ -197,6 +200,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:write")
+    @instrument_tool
    async def nc_notes_append_content(
        note_id: int, content: str, ctx: Context
    ) -> AppendContentResponse:
@@ -247,6 +251,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:read")
+    @instrument_tool
    async def nc_notes_search_notes(query: str, ctx: Context) -> SearchNotesResponse:
        """Search notes by title or content, returning only id, title, and category (requires notes:read scope)."""
        client = await get_client(ctx)
@@ -293,6 +298,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:read")
+    @instrument_tool
    async def nc_notes_get_note(note_id: int, ctx: Context) -> Note:
        """Get a specific note by its ID (requires notes:read scope)"""
        client = await get_client(ctx)
@@ -322,6 +328,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:read")
+    @instrument_tool
    async def nc_notes_get_attachment(
        note_id: int, attachment_filename: str, ctx: Context
    ) -> dict[str, str]:
@@ -368,6 +375,7 @@ def configure_notes_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("notes:write")
+    @instrument_tool
    async def nc_notes_delete_note(note_id: int, ctx: Context) -> DeleteNoteResponse:
        """Delete a note permanently"""
        logger.info("Deleting note %s", note_id)
@@ -1,8 +1,9 @@
 """Semantic search MCP tools using vector database."""

 import logging
+from typing import Literal

-from httpx import HTTPStatusError, RequestError
+from httpx import RequestError
 from mcp.server.fastmcp import Context, FastMCP
 from mcp.shared.exceptions import McpError
 from mcp.types import (
@@ -21,6 +22,15 @@ from nextcloud_mcp_server.models.semantic import (
    SemanticSearchResult,
    VectorSyncStatusResponse,
 )
+from nextcloud_mcp_server.observability.metrics import (
+    instrument_tool,
+)
+from nextcloud_mcp_server.search import (
+    FuzzySearchAlgorithm,
+    HybridSearchAlgorithm,
+    KeywordSearchAlgorithm,
+    SemanticSearchAlgorithm,
+)

 logger = logging.getLogger(__name__)

@@ -30,184 +40,177 @@ def configure_semantic_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("semantic:read")
+    @instrument_tool
    async def nc_semantic_search(
-        query: str, ctx: Context, limit: int = 10, score_threshold: float = 0.7
+        query: str,
+        ctx: Context,
+        limit: int = 10,
+        doc_types: list[str] | None = None,
+        score_threshold: float = 0.7,
+        algorithm: Literal["semantic", "keyword", "fuzzy", "hybrid"] = "hybrid",
+        semantic_weight: float = 0.5,
+        keyword_weight: float = 0.3,
+        fuzzy_weight: float = 0.2,
    ) -> SemanticSearchResponse:
        """
-        Semantic search across all indexed Nextcloud apps using vector embeddings.
+        Search Nextcloud content using configurable algorithms with cross-app support.

-        Searches documents by meaning rather than exact keywords across notes, calendar
-        events, deck cards, files, and contacts. Requires vector database synchronization
-        to be enabled (VECTOR_SYNC_ENABLED=true).
+        Supports multiple search algorithms with client-configurable weighting:
+        - semantic: Vector similarity search (requires VECTOR_SYNC_ENABLED=true)
+        - keyword: Token-based matching (title matches weighted 3x)
+        - fuzzy: Character overlap matching (typo-tolerant)
+        - hybrid: Combines all algorithms using Reciprocal Rank Fusion (default)
+
+        Document types are queried from the vector database to determine what's
+        actually indexed. Currently only "note" documents are fully supported.

        Args:
            query: Natural language search query
            limit: Maximum number of results to return (default: 10)
-            score_threshold: Minimum similarity score (0-1, default: 0.7)
+            doc_types: Document types to search (e.g., ["note", "file"]). None = search all indexed types (default)
+            score_threshold: Minimum similarity score for semantic/hybrid (0-1, default: 0.7)
+            algorithm: Search algorithm to use (default: "hybrid")
+            semantic_weight: Weight for semantic results in hybrid mode (default: 0.5)
+            keyword_weight: Weight for keyword results in hybrid mode (default: 0.3)
+            fuzzy_weight: Weight for fuzzy results in hybrid mode (default: 0.2)

        Returns:
-            SemanticSearchResponse with matching documents and similarity scores
+            SemanticSearchResponse with matching documents and relevance scores
        """
-        from qdrant_client.models import FieldCondition, Filter, MatchValue
-
        from nextcloud_mcp_server.config import get_settings
-        from nextcloud_mcp_server.embedding import get_embedding_service
-        from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

        settings = get_settings()
-
-        # Check if vector sync is enabled
-        if not settings.vector_sync_enabled:
-            raise McpError(
-                ErrorData(
-                    code=-1,
-                    message="Semantic search is not enabled. Set VECTOR_SYNC_ENABLED=true and ensure vector database is configured.",
-                )
-            )
-
        client = await get_client(ctx)
        username = client.username

        logger.info(
-            f"Semantic search: query='{query}', user={username}, "
+            f"Search: query='{query}', user={username}, algorithm={algorithm}, "
            f"limit={limit}, score_threshold={score_threshold}"
        )

        try:
-            # Generate embedding for query
-            embedding_service = get_embedding_service()
-            query_embedding = await embedding_service.embed(query)
-            logger.debug(
-                f"Generated embedding for query (dimension={len(query_embedding)})"
-            )
-
-            # Search Qdrant with user filtering
-            # Note: Currently only searching notes (doc_type="note")
-            # Future: Remove doc_type filter to search all apps
-            qdrant_client = await get_qdrant_client()
-            search_response = await qdrant_client.query_points(
-                collection_name=settings.get_collection_name(),
-                query=query_embedding,
-                query_filter=Filter(
-                    must=[
-                        FieldCondition(
-                            key="user_id",
-                            match=MatchValue(value=username),
-                        ),
-                        FieldCondition(
-                            key="doc_type",
-                            match=MatchValue(value="note"),
-                        ),
-                    ]
-                ),
-                limit=limit * 2,  # Get extra for filtering
-                score_threshold=score_threshold,
-                with_payload=True,
-                with_vectors=False,  # Don't return vectors to save bandwidth
-            )
-
-            logger.info(
-                f"Qdrant returned {len(search_response.points)} results "
-                f"(before deduplication and access verification)"
-            )
-            if search_response.points:
-                # Log top 3 scores to help with threshold tuning
-                top_scores = [p.score for p in search_response.points[:3]]
-                logger.debug(f"Top 3 similarity scores: {top_scores}")
-
-            # Deduplicate by document ID (multiple chunks per document)
-            seen_doc_ids = set()
-            results = []
-
-            for result in search_response.points:
-                doc_id = int(result.payload["doc_id"])
-                doc_type = result.payload.get("doc_type", "note")
-
-                # Skip if we've already seen this document
-                if doc_id in seen_doc_ids:
-                    continue
-
-                seen_doc_ids.add(doc_id)
-
-                # Verify access via Nextcloud API (dual-phase authorization)
-                # Currently only supports notes, will be extended to other apps
-                if doc_type == "note":
-                    try:
-                        note = await client.notes.get_note(doc_id)
-
-                        results.append(
-                            SemanticSearchResult(
-                                id=doc_id,
-                                doc_type="note",
-                                title=result.payload["title"],
-                                category=note.get("category", ""),
-                                excerpt=result.payload["excerpt"],
-                                score=result.score,
-                                chunk_index=result.payload["chunk_index"],
-                                total_chunks=result.payload["total_chunks"],
-                            )
+            # Create appropriate algorithm instance
+            if algorithm == "semantic":
+                if not settings.vector_sync_enabled:
+                    raise McpError(
+                        ErrorData(
+                            code=-1,
+                            message="Semantic search requires VECTOR_SYNC_ENABLED=true",
                        )
+                    )
+                search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
+            elif algorithm == "keyword":
+                search_algo = KeywordSearchAlgorithm()
+            elif algorithm == "fuzzy":
+                search_algo = FuzzySearchAlgorithm()
+            elif algorithm == "hybrid":
+                if semantic_weight > 0 and not settings.vector_sync_enabled:
+                    raise McpError(
+                        ErrorData(
+                            code=-1,
+                            message="Hybrid search with semantic component requires VECTOR_SYNC_ENABLED=true",
+                        )
+                    )
+                search_algo = HybridSearchAlgorithm(
+                    semantic_weight=semantic_weight,
+                    keyword_weight=keyword_weight,
+                    fuzzy_weight=fuzzy_weight,
+                )
+            else:
+                raise McpError(
+                    ErrorData(code=-1, message=f"Unknown algorithm: {algorithm}")
+                )

-                        if len(results) >= limit:
-                            break
+            # Execute search across requested document types
+            # If doc_types is None, search all indexed types (cross-app search)
+            # If doc_types is a list, search only those types
+            all_results = []

-                    except HTTPStatusError as e:
-                        if e.response.status_code == 403:
-                            # User lost access, skip this document
-                            logger.debug(f"Skipping note {doc_id}: access denied (403)")
-                            continue
-                        elif e.response.status_code == 404:
-                            # Document was deleted but not yet removed from vector DB
-                            logger.debug(
-                                f"Skipping note {doc_id}: not found (404), "
-                                f"likely deleted after indexing"
-                            )
-                            continue
-                        else:
-                            # Log other errors but continue processing
-                            logger.warning(
-                                f"Error verifying access to note {doc_id}: {e.response.status_code}"
-                            )
-                            continue
+            if doc_types is None:
+                # Cross-app search: search all indexed types
+                # Get unverified results from Qdrant
+                unverified_results = await search_algo.search(
+                    query=query,
+                    user_id=username,
+                    limit=limit * 2,  # Get extra for access filtering
+                    doc_type=None,  # Signal to search all types
+                    score_threshold=score_threshold,
+                )
+                all_results.extend(unverified_results)
+            else:
+                # Search specific document types
+                # For each requested type, execute search and combine results
+                for dtype in doc_types:
+                    unverified_results = await search_algo.search(
+                        query=query,
+                        user_id=username,
+                        limit=limit * 2,  # Get extra for combining and filtering
+                        doc_type=dtype,
+                        score_threshold=score_threshold,
+                    )
+                    all_results.extend(unverified_results)

-            logger.info(
-                f"Returning {len(results)} results after deduplication and access verification"
-            )
-            if results:
-                result_details = [
-                    f"note_{r.id} (score={r.score:.3f}, title='{r.title}')"
-                    for r in results[:5]  # Show top 5
-                ]
-                logger.debug(f"Top results: {', '.join(result_details)}")
+                # Sort combined results by score
+                all_results.sort(key=lambda r: r.score, reverse=True)
+
+            # Verify access for all results (deduplicates and filters)
+            from nextcloud_mcp_server.search.verification import verify_search_results
+
+            verified_results = await verify_search_results(all_results, client)
+            search_results = verified_results[:limit]  # Final limit after verification
+
+            # Convert SearchResult objects to SemanticSearchResult for response
+            results = []
+            for r in search_results:
+                results.append(
+                    SemanticSearchResult(
+                        id=r.id,
+                        doc_type=r.doc_type,
+                        title=r.title,
+                        category=r.metadata.get("category", "") if r.metadata else "",
+                        excerpt=r.excerpt,
+                        score=r.score,
+                        chunk_index=r.metadata.get("chunk_index", 0)
+                        if r.metadata
+                        else 0,
+                        total_chunks=r.metadata.get("total_chunks", 1)
+                        if r.metadata
+                        else 1,
+                    )
+                )
+
+            logger.info(f"Returning {len(results)} results from {algorithm} search")

            return SemanticSearchResponse(
                results=results,
                query=query,
                total_found=len(results),
-                search_method="semantic",
+                search_method=algorithm,
            )

        except ValueError as e:
-            if "No embedding provider configured" in str(e):
+            error_msg = str(e)
+            if "No embedding provider configured" in error_msg:
                raise McpError(
                    ErrorData(
                        code=-1,
                        message="Embedding service not configured. Set OLLAMA_BASE_URL environment variable.",
                    )
                )
-            raise McpError(ErrorData(code=-1, message=f"Configuration error: {str(e)}"))
+            raise McpError(
+                ErrorData(code=-1, message=f"Configuration error: {error_msg}")
+            )
        except RequestError as e:
            raise McpError(
                ErrorData(code=-1, message=f"Network error during search: {str(e)}")
            )
        except Exception as e:
-            logger.error(f"Semantic search error: {e}", exc_info=True)
-            raise McpError(
-                ErrorData(code=-1, message=f"Semantic search failed: {str(e)}")
-            )
+            logger.error(f"Search error: {e}", exc_info=True)
+            raise McpError(ErrorData(code=-1, message=f"Search failed: {str(e)}"))

    @mcp.tool()
    @require_scopes("semantic:read")
+    @instrument_tool
    async def nc_semantic_search_answer(
        query: str,
        ctx: Context,
@@ -331,21 +334,71 @@ def configure_semantic_tools(mcp: FastMCP):
                success=True,
            )

-        # 4. Construct context from retrieved documents
+        # 4. Fetch full content for notes to provide complete context to LLM
+        # Filter out inaccessible notes (deleted or permissions changed)
+        client = await get_client(ctx)
+        accessible_results = []
+        full_contents = []  # Full content for accessible notes
+
+        for result in search_response.results:
+            if result.doc_type == "note":
+                try:
+                    note = await client.notes.get_note(result.id)
+                    # Note is accessible, store full content
+                    accessible_results.append(result)
+                    full_contents.append(note.get("content", ""))
+                    logger.debug(
+                        f"Fetched full content for note {result.id} "
+                        f"(length: {len(full_contents[-1])} chars)"
+                    )
+                except Exception as e:
+                    # Note might have been deleted or permissions changed
+                    # Filter it out to avoid corrupting LLM with inaccessible data
+                    logger.warning(
+                        f"Failed to fetch full content for note {result.id}: {e}. "
+                        f"Excluding from results."
+                    )
+            else:
+                # Non-note document types (future: calendar, deck, files)
+                # For now, keep them with excerpts
+                accessible_results.append(result)
+                full_contents.append(None)
+
+        # Check if we filtered out all results
+        if not accessible_results:
+            logger.warning(f"All search results became inaccessible for query: {query}")
+            return SamplingSearchResponse(
+                query=query,
+                generated_answer="All matching documents are no longer accessible.",
+                sources=[],
+                total_found=0,
+                search_method="semantic_sampling",
+                success=True,
+            )
+
+        # 5. Construct context from accessible documents with full content
        context_parts = []
-        for idx, result in enumerate(search_response.results, 1):
+        for idx, (result, content) in enumerate(
+            zip(accessible_results, full_contents), 1
+        ):
+            # Use full content if available (notes), otherwise use excerpt
+            if content is not None:
+                content_field = f"Content: {content}"
+            else:
+                content_field = f"Excerpt: {result.excerpt}"
+
            context_parts.append(
                f"[Document {idx}]\n"
                f"Type: {result.doc_type}\n"
                f"Title: {result.title}\n"
                f"Category: {result.category}\n"
-                f"Excerpt: {result.excerpt}\n"
+                f"{content_field}\n"
                f"Relevance Score: {result.score:.2f}\n"
            )

        context = "\n".join(context_parts)

-        # 5. Construct prompt - reuse user's query, add context and instructions
+        # 6. Construct prompt - reuse user's query, add context and instructions
        prompt = (
            f"{query}\n\n"
            f"Here are relevant documents from Nextcloud (notes, calendar events, deck cards, files, contacts):\n\n"
@@ -401,8 +454,8 @@ def configure_semantic_tools(mcp: FastMCP):
            return SamplingSearchResponse(
                query=query,
                generated_answer=generated_answer,
-                sources=search_response.results,
-                total_found=search_response.total_found,
+                sources=accessible_results,
+                total_found=len(accessible_results),
                search_method="semantic_sampling",
                model_used=sampling_result.model,
                stop_reason=sampling_result.stopReason,
@@ -419,11 +472,11 @@ def configure_semantic_tools(mcp: FastMCP):
                generated_answer=(
                    f"[Sampling request timed out]\n\n"
                    f"The answer generation took too long (>30s). "
-                    f"Found {search_response.total_found} relevant documents. "
+                    f"Found {len(accessible_results)} relevant documents. "
                    f"Please review the sources below or try a simpler query."
                ),
-                sources=search_response.results,
-                total_found=search_response.total_found,
+                sources=accessible_results,
+                total_found=len(accessible_results),
                search_method="semantic_sampling_timeout",
                success=True,
            )
@@ -454,11 +507,11 @@ def configure_semantic_tools(mcp: FastMCP):
                query=query,
                generated_answer=(
                    f"[{user_message}]\n\n"
-                    f"Found {search_response.total_found} relevant documents. "
+                    f"Found {len(accessible_results)} relevant documents. "
                    f"Please review the sources below."
                ),
-                sources=search_response.results,
-                total_found=search_response.total_found,
+                sources=accessible_results,
+                total_found=len(accessible_results),
                search_method=search_method,
                success=True,
            )
@@ -475,17 +528,18 @@ def configure_semantic_tools(mcp: FastMCP):
                query=query,
                generated_answer=(
                    f"[Unexpected error during sampling]\n\n"
-                    f"Found {search_response.total_found} relevant documents. "
+                    f"Found {len(accessible_results)} relevant documents. "
                    f"Please review the sources below."
                ),
-                sources=search_response.results,
-                total_found=search_response.total_found,
+                sources=accessible_results,
+                total_found=len(accessible_results),
                search_method="semantic_sampling_error",
                success=True,
            )

    @mcp.tool()
    @require_scopes("semantic:read")
+    @instrument_tool
    async def nc_get_vector_sync_status(ctx: Context) -> VectorSyncStatusResponse:
        """Get the current vector sync status.

@@ -6,6 +6,7 @@ from mcp.server.fastmcp import Context, FastMCP

 from nextcloud_mcp_server.auth import require_scopes
 from nextcloud_mcp_server.context import get_client
+from nextcloud_mcp_server.observability.metrics import instrument_tool


 def configure_sharing_tools(mcp: FastMCP):
@@ -17,6 +18,7 @@ def configure_sharing_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("sharing:write")
+    @instrument_tool
    async def nc_share_create(
        path: str,
        share_with: str,
@@ -56,6 +58,7 @@ def configure_sharing_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("sharing:write")
+    @instrument_tool
    async def nc_share_delete(share_id: int, ctx: Context) -> str:
        """Delete a share by its ID.

@@ -75,6 +78,7 @@ def configure_sharing_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("sharing:write")
+    @instrument_tool
    async def nc_share_get(share_id: int, ctx: Context) -> str:
        """Get information about a specific share.

@@ -93,6 +97,7 @@ def configure_sharing_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("sharing:write")
+    @instrument_tool
    async def nc_share_list(
        ctx: Context, path: str | None = None, shared_with_me: bool = False
    ) -> str:
@@ -114,6 +119,7 @@ def configure_sharing_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("sharing:write")
+    @instrument_tool
    async def nc_share_update(share_id: int, permissions: int, ctx: Context) -> str:
        """Update the permissions of an existing share.

@@ -4,6 +4,7 @@ from mcp.server.fastmcp import Context, FastMCP

 from nextcloud_mcp_server.auth import require_scopes
 from nextcloud_mcp_server.context import get_client
+from nextcloud_mcp_server.observability.metrics import instrument_tool

 logger = logging.getLogger(__name__)

@@ -12,6 +13,7 @@ def configure_tables_tools(mcp: FastMCP):
    # Tables tools
    @mcp.tool()
    @require_scopes("tables:read")
+    @instrument_tool
    async def nc_tables_list_tables(ctx: Context):
        """List all tables available to the user"""
        client = await get_client(ctx)
@@ -19,6 +21,7 @@ def configure_tables_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("tables:read")
+    @instrument_tool
    async def nc_tables_get_schema(table_id: int, ctx: Context):
        """Get the schema/structure of a specific table including columns and views"""
        client = await get_client(ctx)
@@ -26,6 +29,7 @@ def configure_tables_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("tables:read")
+    @instrument_tool
    async def nc_tables_read_table(
        table_id: int,
        ctx: Context,
@@ -38,6 +42,7 @@ def configure_tables_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("tables:write")
+    @instrument_tool
    async def nc_tables_insert_row(table_id: int, data: dict, ctx: Context):
        """Insert a new row into a table.

@@ -48,6 +53,7 @@ def configure_tables_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("tables:write")
+    @instrument_tool
    async def nc_tables_update_row(row_id: int, data: dict, ctx: Context):
        """Update an existing row in a table.

@@ -58,6 +64,7 @@ def configure_tables_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("tables:write")
+    @instrument_tool
    async def nc_tables_delete_row(row_id: int, ctx: Context):
        """Delete a row from a table"""
        client = await get_client(ctx)
@@ -5,6 +5,7 @@ from mcp.server.fastmcp import Context, FastMCP
 from nextcloud_mcp_server.auth import require_scopes
 from nextcloud_mcp_server.context import get_client
 from nextcloud_mcp_server.models import DirectoryListing, FileInfo, SearchFilesResponse
+from nextcloud_mcp_server.observability.metrics import instrument_tool
 from nextcloud_mcp_server.utils.document_parser import (
    is_parseable_document,
    parse_document,
@@ -17,6 +18,7 @@ def configure_webdav_tools(mcp: FastMCP):
    # WebDAV file system tools
    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_list_directory(
        ctx: Context, path: str = ""
    ) -> DirectoryListing:
@@ -50,6 +52,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_read_file(path: str, ctx: Context):
        """Read the content of a file from NextCloud.

@@ -130,6 +133,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:write")
+    @instrument_tool
    async def nc_webdav_write_file(
        path: str, content: str, ctx: Context, content_type: str | None = None
    ):
@@ -158,6 +162,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:write")
+    @instrument_tool
    async def nc_webdav_create_directory(path: str, ctx: Context):
        """Create a directory in NextCloud.

@@ -172,6 +177,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:write")
+    @instrument_tool
    async def nc_webdav_delete_resource(path: str, ctx: Context):
        """Delete a file or directory in NextCloud.

@@ -186,6 +192,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:write")
+    @instrument_tool
    async def nc_webdav_move_resource(
        source_path: str, destination_path: str, ctx: Context, overwrite: bool = False
    ):
@@ -206,6 +213,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:write")
+    @instrument_tool
    async def nc_webdav_copy_resource(
        source_path: str, destination_path: str, ctx: Context, overwrite: bool = False
    ):
@@ -226,6 +234,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_search_files(
        ctx: Context,
        scope: str = "",
@@ -342,6 +351,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_find_by_name(
        pattern: str, ctx: Context, scope: str = "", limit: int | None = None
    ) -> SearchFilesResponse:
@@ -369,6 +379,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_find_by_type(
        mime_type: str, ctx: Context, scope: str = "", limit: int | None = None
    ) -> SearchFilesResponse:
@@ -396,6 +407,7 @@ def configure_webdav_tools(mcp: FastMCP):

    @mcp.tool()
    @require_scopes("files:read")
+    @instrument_tool
    async def nc_webdav_list_favorites(
        ctx: Context, scope: str = "", limit: int | None = None
    ) -> SearchFilesResponse:
@@ -0,0 +1,140 @@
+"""Custom PCA implementation for dimensionality reduction.
+
+Implements Principal Component Analysis without scikit-learn dependency.
+Used for reducing high-dimensional embeddings (768-dim) to 2D for visualization.
+"""
+
+import logging
+
+import numpy as np
+
+logger = logging.getLogger(__name__)
+
+
+class PCA:
+    """Principal Component Analysis for dimensionality reduction.
+
+    Simple implementation that finds principal components via eigendecomposition
+    of the covariance matrix. Suitable for small-to-medium datasets.
+
+    Attributes:
+        n_components: Number of principal components to keep
+        mean_: Mean of training data (set during fit)
+        components_: Principal components (eigenvectors)
+        explained_variance_: Variance explained by each component
+        explained_variance_ratio_: Fraction of total variance explained
+    """
+
+    def __init__(self, n_components: int = 2):
+        """Initialize PCA.
+
+        Args:
+            n_components: Number of components to keep (default: 2)
+        """
+        if n_components < 1:
+            raise ValueError(f"n_components must be >= 1, got {n_components}")
+
+        self.n_components = n_components
+        self.mean_: np.ndarray | None = None
+        self.components_: np.ndarray | None = None
+        self.explained_variance_: np.ndarray | None = None
+        self.explained_variance_ratio_: np.ndarray | None = None
+
+    def fit(self, X: np.ndarray) -> "PCA":
+        """Fit PCA model to data.
+
+        Args:
+            X: Training data of shape (n_samples, n_features)
+
+        Returns:
+            self (for method chaining)
+
+        Raises:
+            ValueError: If X has fewer features than n_components
+        """
+        X = np.asarray(X)
+
+        if X.ndim != 2:
+            raise ValueError(f"X must be 2D array, got shape {X.shape}")
+
+        n_samples, n_features = X.shape
+
+        if n_features < self.n_components:
+            raise ValueError(
+                f"n_components={self.n_components} > n_features={n_features}"
+            )
+
+        # Center data
+        self.mean_ = np.mean(X, axis=0)
+        X_centered = X - self.mean_
+
+        # Compute covariance matrix
+        # Use (X^T X) / (n-1) for numerical stability with high-dim data
+        cov = np.cov(X_centered.T)
+
+        # Eigendecomposition
+        eigenvalues, eigenvectors = np.linalg.eigh(cov)
+
+        # Sort by eigenvalue (descending)
+        idx = np.argsort(eigenvalues)[::-1]
+        eigenvalues = eigenvalues[idx]
+        eigenvectors = eigenvectors[:, idx]
+
+        # Keep top n_components
+        self.components_ = eigenvectors[:, : self.n_components].T
+        self.explained_variance_ = eigenvalues[: self.n_components]
+
+        # Calculate explained variance ratio
+        total_variance = np.sum(eigenvalues)
+        if total_variance > 0:
+            self.explained_variance_ratio_ = self.explained_variance_ / total_variance
+        else:
+            self.explained_variance_ratio_ = np.zeros(self.n_components)
+
+        logger.debug(
+            f"PCA fit: {n_samples} samples, {n_features} features → "
+            f"{self.n_components} components, "
+            f"explained variance: {self.explained_variance_ratio_}"
+        )
+
+        return self
+
+    def transform(self, X: np.ndarray) -> np.ndarray:
+        """Transform data to principal component space.
+
+        Args:
+            X: Data to transform of shape (n_samples, n_features)
+
+        Returns:
+            Transformed data of shape (n_samples, n_components)
+
+        Raises:
+            ValueError: If PCA not fitted yet
+        """
+        if self.mean_ is None or self.components_ is None:
+            raise ValueError("PCA not fitted yet. Call fit() first.")
+
+        X = np.asarray(X)
+
+        if X.ndim != 2:
+            raise ValueError(f"X must be 2D array, got shape {X.shape}")
+
+        # Center using training mean
+        X_centered = X - self.mean_
+
+        # Project onto principal components
+        X_transformed = np.dot(X_centered, self.components_.T)
+
+        return X_transformed
+
+    def fit_transform(self, X: np.ndarray) -> np.ndarray:
+        """Fit PCA model and transform data in one step.
+
+        Args:
+            X: Training data of shape (n_samples, n_features)
+
+        Returns:
+            Transformed data of shape (n_samples, n_components)
+        """
+        self.fit(X)
+        return self.transform(X)
@@ -15,6 +15,11 @@ from qdrant_client.models import FieldCondition, Filter, MatchValue, PointStruct
 from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import get_settings
 from nextcloud_mcp_server.embedding import get_embedding_service
+from nextcloud_mcp_server.observability.metrics import (
+    record_qdrant_operation,
+    record_vector_sync_processing,
+    update_vector_sync_queue_size,
+)
 from nextcloud_mcp_server.observability.tracing import trace_operation
 from nextcloud_mcp_server.vector.document_chunker import DocumentChunker
 from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
@@ -57,11 +62,21 @@ async def processor_task(
            with anyio.fail_after(1.0):
                doc_task = await receive_stream.receive()

+            # Update queue size metric after receiving
+            stream_stats = receive_stream.statistics()
+            update_vector_sync_queue_size(stream_stats.current_buffer_used)
+
            # Process document
            await process_document(doc_task, nc_client)

+            # Update queue size metric after processing
+            stream_stats = receive_stream.statistics()
+            update_vector_sync_queue_size(stream_stats.current_buffer_used)
+
        except TimeoutError:
-            # No documents available, continue
+            # No documents available, update metric to show empty queue
+            stream_stats = receive_stream.statistics()
+            update_vector_sync_queue_size(stream_stats.current_buffer_used)
            continue

        except anyio.EndOfStream:
@@ -90,6 +105,8 @@ async def process_document(doc_task: DocumentTask, nc_client: NextcloudClient):
        doc_task: Document task to process
        nc_client: Authenticated Nextcloud client
    """
+    start_time = time.time()
+
    logger.debug(
        f"Processing {doc_task.doc_type}_{doc_task.doc_id} "
        f"for {doc_task.user_id} ({doc_task.operation})"
@@ -105,58 +122,79 @@ async def process_document(doc_task: DocumentTask, nc_client: NextcloudClient):
            "vector_sync.doc_operation": doc_task.operation,
        },
    ):
-        qdrant_client = await get_qdrant_client()
-        settings = get_settings()
+        try:
+            qdrant_client = await get_qdrant_client()
+            settings = get_settings()

-        # Handle deletion
-        if doc_task.operation == "delete":
-            await qdrant_client.delete(
-                collection_name=settings.get_collection_name(),
-                points_selector=Filter(
-                    must=[
-                        FieldCondition(
-                            key="user_id",
-                            match=MatchValue(value=doc_task.user_id),
-                        ),
-                        FieldCondition(
-                            key="doc_id",
-                            match=MatchValue(value=doc_task.doc_id),
-                        ),
-                        FieldCondition(
-                            key="doc_type",
-                            match=MatchValue(value=doc_task.doc_type),
-                        ),
-                    ]
-                ),
-            )
-            logger.info(
-                f"Deleted {doc_task.doc_type}_{doc_task.doc_id} for {doc_task.user_id}"
-            )
-            return
+            # Handle deletion
+            if doc_task.operation == "delete":
+                await qdrant_client.delete(
+                    collection_name=settings.get_collection_name(),
+                    points_selector=Filter(
+                        must=[
+                            FieldCondition(
+                                key="user_id",
+                                match=MatchValue(value=doc_task.user_id),
+                            ),
+                            FieldCondition(
+                                key="doc_id",
+                                match=MatchValue(value=doc_task.doc_id),
+                            ),
+                            FieldCondition(
+                                key="doc_type",
+                                match=MatchValue(value=doc_task.doc_type),
+                            ),
+                        ]
+                    ),
+                )
+                logger.info(
+                    f"Deleted {doc_task.doc_type}_{doc_task.doc_id} for {doc_task.user_id}"
+                )

-        # Handle indexing with retry
-        max_retries = 3
-        retry_delay = 1.0
+                # Record successful deletion metrics
+                duration = time.time() - start_time
+                record_qdrant_operation("delete", "success")
+                record_vector_sync_processing(duration, "success")
+                return

-        for attempt in range(max_retries):
-            try:
-                await _index_document(doc_task, nc_client, qdrant_client)
-                return  # Success
+            # Handle indexing with retry
+            max_retries = 3
+            retry_delay = 1.0

-            except (HTTPStatusError, Exception) as e:
-                if attempt < max_retries - 1:
-                    logger.warning(
-                        f"Retry {attempt + 1}/{max_retries} for "
-                        f"{doc_task.doc_type}_{doc_task.doc_id}: {e}"
-                    )
-                    await anyio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff
-                else:
-                    logger.error(
-                        f"Failed to index {doc_task.doc_type}_{doc_task.doc_id} "
-                        f"after {max_retries} retries: {e}"
-                    )
-                    raise
+            for attempt in range(max_retries):
+                try:
+                    await _index_document(doc_task, nc_client, qdrant_client)
+
+                    # Record successful processing metrics
+                    duration = time.time() - start_time
+                    record_qdrant_operation("upsert", "success")
+                    record_vector_sync_processing(duration, "success")
+                    return  # Success
+
+                except (HTTPStatusError, Exception) as e:
+                    if attempt < max_retries - 1:
+                        logger.warning(
+                            f"Retry {attempt + 1}/{max_retries} for "
+                            f"{doc_task.doc_type}_{doc_task.doc_id}: {e}"
+                        )
+                        await anyio.sleep(retry_delay)
+                        retry_delay *= 2  # Exponential backoff
+                    else:
+                        logger.error(
+                            f"Failed to index {doc_task.doc_type}_{doc_task.doc_id} "
+                            f"after {max_retries} retries: {e}"
+                        )
+                        # Record failed processing metrics
+                        duration = time.time() - start_time
+                        record_qdrant_operation("upsert", "error")
+                        record_vector_sync_processing(duration, "error")
+                        raise
+
+        except Exception:
+            # Catch any other unexpected errors
+            duration = time.time() - start_time
+            record_vector_sync_processing(duration, "error")
+            raise


 async def _index_document(
@@ -66,10 +66,23 @@ async def get_qdrant_client() -> AsyncQdrantClient:
        from nextcloud_mcp_server.embedding import get_embedding_service

        embedding_service = get_embedding_service()
+
+        # Detect dimension dynamically (for OllamaEmbeddingProvider)
+        if hasattr(embedding_service.provider, "_detect_dimension"):
+            await embedding_service.provider._detect_dimension()  # type: ignore[call-non-callable]
+
        expected_dimension = embedding_service.get_dimension()

-        try:
-            # Get existing collection
+        # Explicitly check if collection exists
+        logger.debug(f"Checking if collection '{collection_name}' exists...")
+        collections = await _qdrant_client.get_collections()
+        collection_names = [c.name for c in collections.collections]
+
+        if collection_name in collection_names:
+            # Collection exists - validate dimensions
+            logger.debug(
+                f"Collection '{collection_name}' found, validating dimensions..."
+            )
            collection_info = await _qdrant_client.get_collection(collection_name)
            actual_dimension = collection_info.config.params.vectors.size

@@ -91,12 +104,12 @@ async def get_qdrant_client() -> AsyncQdrantClient:
                f"(dimension={actual_dimension}, model={settings.ollama_embedding_model})"
            )

-        except Exception as e:
-            # Check if it's a dimension mismatch error (re-raise it)
-            if isinstance(e, ValueError) and "Dimension mismatch" in str(e):
-                raise
-
-            # Collection doesn't exist or other error, create it
+        else:
+            # Collection doesn't exist - create it
+            logger.info(
+                f"Collection '{collection_name}' not found, creating with "
+                f"dimension={expected_dimension}, model={settings.ollama_embedding_model}..."
+            )
            await _qdrant_client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
@@ -13,6 +13,7 @@ from qdrant_client.models import FieldCondition, Filter, MatchValue

 from nextcloud_mcp_server.client import NextcloudClient
 from nextcloud_mcp_server.config import get_settings
+from nextcloud_mcp_server.observability.metrics import record_vector_sync_scan
 from nextcloud_mcp_server.observability.tracing import trace_operation
 from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client

@@ -181,6 +182,9 @@ async def scan_user_documents(
        ]
        logger.info(f"[SCAN-{scan_id}] Found {len(notes)} notes for {user_id}")

+        # Record documents scanned
+        record_vector_sync_scan(len(notes))
+
        if initial_sync:
            # Send everything on first sync
            for note in notes:
@@ -1,6 +1,6 @@
 [project]
 name = "nextcloud-mcp-server"
-version = "0.31.1"
+version = "0.36.0"
 description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
 authors = [
    {name = "Chris Coutinho", email = "chris@coutinho.io"}
@@ -1,307 +0,0 @@
-#!/usr/bin/env python3
-"""Script to automatically add @require_scopes decorators to MCP tools.
-
-This script parses server module files and adds appropriate scope decorators
-based on the operation type (read vs write).
-
-Usage:
-    python scripts/add_scope_decorators.py [--dry-run] [--file FILE]
-"""
-
-import argparse
-import ast
-import re
-from pathlib import Path
-from typing import List, Tuple
-
-# Operation patterns for classification
-READ_PATTERNS = [
-    r".*_get_.*",
-    r".*_get$",
-    r".*_list_.*",
-    r".*_list$",
-    r".*_search_.*",
-    r".*_search$",
-    r".*_read_.*",
-    r".*_read$",
-    r".*_find_.*",
-    r".*_find$",
-    r".*_fetch_.*",
-    r".*_fetch$",
-    r".*_retrieve_.*",
-    r".*_retrieve$",
-]
-
-WRITE_PATTERNS = [
-    r".*_create_.*",
-    r".*_create$",
-    r".*_update_.*",
-    r".*_update$",
-    r".*_delete_.*",
-    r".*_delete$",
-    r".*_append_.*",
-    r".*_append$",
-    r".*_modify_.*",
-    r".*_modify$",
-    r".*_set_.*",
-    r".*_set$",
-    r".*_add_.*",
-    r".*_add$",
-    r".*_remove_.*",
-    r".*_remove$",
-    r".*_edit_.*",
-    r".*_edit$",
-    r".*_move_.*",
-    r".*_move$",
-    r".*_copy_.*",
-    r".*_copy$",
-    r".*_upload_.*",
-    r".*_upload$",
-    r".*_download_.*",
-    r".*_download$",
-    r".*_share_.*",
-    r".*_share$",
-    r".*_unshare_.*",
-    r".*_unshare$",
-    r".*_bulk_.*",  # Bulk operations are typically writes
-]
-
-
-def classify_operation(func_name: str) -> str | None:
-    """Classify a function as read or write operation.
-
-    Args:
-        func_name: Function name to classify
-
-    Returns:
-        "nc:read", "nc:write", or None if cannot classify
-    """
-    # Check write patterns first (more specific)
-    for pattern in WRITE_PATTERNS:
-        if re.match(pattern, func_name):
-            return "nc:write"
-
-    # Check read patterns
-    for pattern in READ_PATTERNS:
-        if re.match(pattern, func_name):
-            return "nc:read"
-
-    return None
-
-
-def has_scope_decorator(decorators: List[ast.expr]) -> bool:
-    """Check if function already has @require_scopes decorator."""
-    for decorator in decorators:
-        if isinstance(decorator, ast.Call):
-            if (
-                isinstance(decorator.func, ast.Name)
-                and decorator.func.id == "require_scopes"
-            ):
-                return True
-        elif isinstance(decorator, ast.Name) and decorator.name == "require_scopes":
-            return True
-    return False
-
-
-def has_mcp_tool_decorator(decorators: List[ast.expr]) -> bool:
-    """Check if function has @mcp.tool() decorator."""
-    for decorator in decorators:
-        if isinstance(decorator, ast.Call):
-            if isinstance(decorator.func, ast.Attribute):
-                if decorator.func.attr == "tool":
-                    return True
-    return False
-
-
-def find_tools_needing_decorators(
-    file_path: Path, verbose: bool = False
-) -> List[Tuple[str, int, str]]:
-    """Find all tools that need scope decorators.
-
-    Returns:
-        List of (function_name, line_number, required_scope)
-    """
-    with open(file_path) as f:
-        content = f.read()
-
-    try:
-        tree = ast.parse(content)
-    except SyntaxError as e:
-        print(f"  ⚠️  Syntax error in {file_path}: {e}")
-        return []
-
-    tools_to_update = []
-    total_functions = 0
-    mcp_tools = 0
-    already_has_scope = 0
-    cannot_classify = 0
-
-    for node in ast.walk(tree):
-        if isinstance(node, ast.FunctionDef):
-            total_functions += 1
-
-            if verbose and node.decorator_list:
-                decorators_str = [
-                    ast.unparse(d) if hasattr(ast, "unparse") else str(d)
-                    for d in node.decorator_list
-                ]
-                print(f"  Function {node.name} has decorators: {decorators_str}")
-
-            # Check if it's an MCP tool
-            if not has_mcp_tool_decorator(node.decorator_list):
-                continue
-
-            mcp_tools += 1
-
-            # Check if it already has scope decorator
-            if has_scope_decorator(node.decorator_list):
-                already_has_scope += 1
-                continue
-
-            # Classify operation
-            scope = classify_operation(node.name)
-            if scope:
-                tools_to_update.append((node.name, node.lineno, scope))
-            else:
-                cannot_classify += 1
-                if verbose:
-                    print(f"  ⚠️  Cannot classify: {node.name}")
-
-    if verbose:
-        print(
-            f"  Debug: total_functions={total_functions}, mcp_tools={mcp_tools}, already_has_scope={already_has_scope}, cannot_classify={cannot_classify}"
-        )
-
-    return tools_to_update
-
-
-def add_decorator_to_file(
-    file_path: Path, dry_run: bool = False, verbose: bool = False
-) -> int:
-    """Add @require_scopes decorators to tools in a file.
-
-    Returns:
-        Number of decorators added
-    """
-    tools = find_tools_needing_decorators(file_path, verbose=verbose)
-
-    if not tools:
-        return 0
-
-    print(f"\n📝 {file_path.relative_to(Path.cwd())}")
-
-    with open(file_path) as f:
-        lines = f.readlines()
-
-    # Check if require_scopes is already imported
-    has_import = False
-    import_line_idx = None
-    for i, line in enumerate(lines):
-        if "from nextcloud_mcp_server.auth import" in line and "require_scopes" in line:
-            has_import = True
-            break
-        elif "from nextcloud_mcp_server.auth import" in line:
-            import_line_idx = i
-
-    # Add import if needed
-    if not has_import:
-        if import_line_idx is not None:
-            # Add require_scopes to existing import
-            old_line = lines[import_line_idx]
-            if "(" in old_line:
-                # Multi-line import
-                print(
-                    "  ⚠️  Multi-line import detected, please add manually: from nextcloud_mcp_server.auth import require_scopes"
-                )
-            else:
-                # Single line import - add require_scopes
-                lines[import_line_idx] = (
-                    old_line.rstrip().rstrip(")").rstrip() + ", require_scopes)\n"
-                )
-                print("  ✓ Added require_scopes to import")
-        else:
-            # No auth import exists, add new import
-            # Find first import line
-            for i, line in enumerate(lines):
-                if line.startswith("from nextcloud_mcp_server"):
-                    lines.insert(
-                        i, "from nextcloud_mcp_server.auth import require_scopes\n"
-                    )
-                    print(
-                        "  ✓ Added import: from nextcloud_mcp_server.auth import require_scopes"
-                    )
-                    break
-
-    # Add decorators to tools (in reverse order to preserve line numbers)
-    for func_name, line_num, scope in reversed(tools):
-        # Find the @mcp.tool() decorator line
-        for i in range(line_num - 1, max(0, line_num - 10), -1):
-            if "@mcp.tool()" in lines[i]:
-                # Get indentation from @mcp.tool() line
-                indent = len(lines[i]) - len(lines[i].lstrip())
-                decorator_line = " " * indent + f'@require_scopes("{scope}")\n'
-                lines.insert(i + 1, decorator_line)
-                print(f'  ✓ {func_name}:{line_num} → @require_scopes("{scope}")')
-                break
-
-    if not dry_run:
-        with open(file_path, "w") as f:
-            f.writelines(lines)
-        print("  💾 Saved changes")
-    else:
-        print("  🔍 DRY RUN - no changes written")
-
-    return len(tools)
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Add @require_scopes decorators to MCP tools"
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="Show what would be changed without modifying files",
-    )
-    parser.add_argument(
-        "--file",
-        type=Path,
-        help="Process a single file instead of all server modules",
-    )
-    parser.add_argument(
-        "--verbose",
-        "-v",
-        action="store_true",
-        help="Show debug information",
-    )
-    args = parser.parse_args()
-
-    server_dir = Path(__file__).parent.parent / "nextcloud_mcp_server" / "server"
-
-    if args.file:
-        files = [args.file]
-    else:
-        files = sorted(server_dir.glob("*.py"))
-        files = [f for f in files if f.name != "__init__.py"]
-
-    print("🔍 Scanning for tools needing scope decorators...")
-    print(
-        f"   {'DRY RUN MODE - No changes will be made' if args.dry_run else 'LIVE MODE - Files will be modified'}"
-    )
-
-    total_added = 0
-    for file_path in files:
-        added = add_decorator_to_file(
-            file_path, dry_run=args.dry_run, verbose=args.verbose
-        )
-        total_added += added
-
-    print(f"\n{'📊 Summary (dry run)' if args.dry_run else '✅ Complete'}")
-    print(f"   Total decorators added: {total_added}")
-
-    if args.dry_run:
-        print("\n💡 Run without --dry-run to apply changes")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,232 +0,0 @@
-#!/usr/bin/env python3
-"""Simpler script to add @require_scopes decorators using regex.
-
-This script uses regex patterns to find @mcp.tool() decorators and adds
-the appropriate @require_scopes decorator based on function name patterns.
-
-Usage:
-    python scripts/add_scope_decorators_simple.py [--dry-run]
-"""
-
-import argparse
-import re
-from pathlib import Path
-
-# Operation patterns for classification
-READ_KEYWORDS = [
-    "get",
-    "list",
-    "search",
-    "read",
-    "find",
-    "fetch",
-    "retrieve",
-    "upcoming",
-]
-WRITE_KEYWORDS = [
-    "create",
-    "update",
-    "delete",
-    "append",
-    "modify",
-    "set",
-    "add",
-    "remove",
-    "edit",
-    "move",
-    "copy",
-    "upload",
-    "download",
-    "share",
-    "unshare",
-    "bulk",
-    "manage",
-    "import",
-    "reindex",
-    "archive",
-    "unarchive",
-    "reorder",
-    "assign",
-    "unassign",
-    "insert",
-    "write",
-]
-
-
-def classify_function(func_name: str) -> str | None:
-    """Classify a function name as read or write operation."""
-    func_lower = func_name.lower()
-
-    # Check write keywords first (more specific)
-    for keyword in WRITE_KEYWORDS:
-        if f"_{keyword}_" in func_lower or func_lower.endswith(f"_{keyword}"):
-            return "nc:write"
-
-    # Check read keywords
-    for keyword in READ_KEYWORDS:
-        if f"_{keyword}_" in func_lower or func_lower.endswith(f"_{keyword}"):
-            return "nc:read"
-
-    return None
-
-
-def process_file(file_path: Path, dry_run: bool = False) -> int:
-    """Process a single file to add @require_scopes decorators.
-
-    Returns:
-        Number of decorators added
-    """
-    with open(file_path) as f:
-        lines = f.readlines()
-
-    # Check if require_scopes is already imported
-    has_import = False
-    import_line_idx = None
-
-    for i, line in enumerate(lines):
-        if "from nextcloud_mcp_server.auth import" in line:
-            if "require_scopes" in line:
-                has_import = True
-            else:
-                import_line_idx = i
-
-    modified = False
-    decorators_added = 0
-
-    # Find all @mcp.tool() decorators
-    i = 0
-    while i < len(lines):
-        line = lines[i]
-
-        # Look for @mcp.tool() decorator
-        if re.match(r"\s*@mcp\.tool\(\)", line):
-            # Check if next line already has @require_scopes
-            if i + 1 < len(lines) and "@require_scopes" in lines[i + 1]:
-                i += 1
-                continue
-
-            # Find the function definition (should be on next line or after other decorators)
-            func_line_idx = i + 1
-            while func_line_idx < len(lines) and not lines[
-                func_line_idx
-            ].strip().startswith("async def"):
-                func_line_idx += 1
-
-            if func_line_idx >= len(lines):
-                i += 1
-                continue
-
-            # Extract function name
-            func_match = re.match(r"\s*async def (\w+)\(", lines[func_line_idx])
-            if not func_match:
-                i += 1
-                continue
-
-            func_name = func_match.group(1)
-            scope = classify_function(func_name)
-
-            if scope:
-                # Get indentation from @mcp.tool() line
-                indent = len(line) - len(line.lstrip())
-                decorator_line = " " * indent + f'@require_scopes("{scope}")\n'
-
-                # Insert after @mcp.tool()
-                lines.insert(i + 1, decorator_line)
-                decorators_added += 1
-                modified = True
-                print(f'  ✓ {func_name} → @require_scopes("{scope}")')
-            else:
-                print(f"  ⚠️  Cannot classify: {func_name}")
-
-        i += 1
-
-    # Add import if needed and decorators were added
-    if decorators_added > 0 and not has_import:
-        if import_line_idx is not None:
-            # Add to existing import
-            old_line = lines[import_line_idx]
-            if old_line.rstrip().endswith(")"):
-                lines[import_line_idx] = old_line.rstrip()[:-1] + ", require_scopes)\n"
-            else:
-                lines[import_line_idx] = old_line.rstrip() + ", require_scopes\n"
-            print("  ✓ Added require_scopes to existing import")
-            modified = True
-        else:
-            # No auth import exists, add new import after last 'from nextcloud_mcp_server' import
-            last_nc_import_idx = None
-            for i, line in enumerate(lines):
-                if line.startswith("from nextcloud_mcp_server"):
-                    last_nc_import_idx = i
-
-            if last_nc_import_idx is not None:
-                lines.insert(
-                    last_nc_import_idx + 1,
-                    "from nextcloud_mcp_server.auth import require_scopes\n",
-                )
-                print(
-                    "  ✓ Added new import: from nextcloud_mcp_server.auth import require_scopes"
-                )
-                modified = True
-            else:
-                print("  ⚠️  Could not find place to add require_scopes import")
-
-    # Write changes
-    if modified and not dry_run:
-        with open(file_path, "w") as f:
-            f.writelines(lines)
-        print(f"  💾 Saved changes to {file_path.name}")
-    elif dry_run and decorators_added > 0:
-        print(f"  🔍 DRY RUN - would add {decorators_added} decorators")
-
-    return decorators_added
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="Add @require_scopes decorators to MCP tools"
-    )
-    parser.add_argument(
-        "--dry-run",
-        action="store_true",
-        help="Show what would be changed without modifying files",
-    )
-    parser.add_argument(
-        "--file",
-        type=Path,
-        help="Process a single file instead of all server modules",
-    )
-    args = parser.parse_args()
-
-    server_dir = Path(__file__).parent.parent / "nextcloud_mcp_server" / "server"
-
-    if args.file:
-        files = [args.file]
-    else:
-        files = sorted(server_dir.glob("*.py"))
-        files = [f for f in files if f.name != "__init__.py"]
-
-    print("🔍 Scanning for tools needing scope decorators...")
-    print(
-        f"   {'DRY RUN MODE - No changes will be made' if args.dry_run else 'LIVE MODE - Files will be modified'}"
-    )
-
-    total_added = 0
-    for file_path in files:
-        file_path = file_path.resolve()  # Convert to absolute path
-        try:
-            display_path = file_path.relative_to(Path.cwd())
-        except ValueError:
-            display_path = file_path.name
-        print(f"\n📝 {display_path}")
-        added = process_file(file_path, dry_run=args.dry_run)
-        total_added += added
-
-    print(f"\n{'📊 Summary (dry run)' if args.dry_run else '✅ Complete'}")
-    print(f"   Total decorators added: {total_added}")
-
-    if args.dry_run and total_added > 0:
-        print("\n💡 Run without --dry-run to apply changes")
-
-
-if __name__ == "__main__":
-    main()
@@ -1,90 +0,0 @@
-#!/bin/bash
-set -e
-
-echo "=== Testing Separate Clients Architecture ==="
-echo ""
-
-# Check both clients exist in Keycloak
-echo "1. Verifying Keycloak clients..."
-docker compose exec -T app curl -s http://keycloak:8080/realms/nextcloud-mcp/.well-known/openid-configuration > /dev/null && echo "✓ Keycloak realm available"
-
-# Check user_oidc provider configuration
-echo ""
-echo "2. Checking user_oidc provider..."
-PROVIDER_INFO=$(docker compose exec -T app php occ user_oidc:provider keycloak)
-echo "$PROVIDER_INFO" | grep -q "nextcloud" && echo "✓ user_oidc configured with 'nextcloud' client"
-
-# Get token from nextcloud-mcp-server client
-echo ""
-echo "3. Getting token from 'nextcloud-mcp-server' client..."
-TOKEN=$(curl -s -X POST "http://localhost:8888/realms/nextcloud-mcp/protocol/openid-connect/token" \
-  -d "grant_type=password" \
-  -d "client_id=nextcloud-mcp-server" \
-  -d "client_secret=mcp-secret-change-in-production" \
-  -d "username=admin" \
-  -d "password=admin" \
-  -d "scope=openid profile email offline_access" | jq -r '.access_token')
-
-if [ "$TOKEN" = "null" ] || [ -z "$TOKEN" ]; then
-    echo "✗ Failed to get token"
-    exit 1
-fi
-
-echo "✓ Got token from nextcloud-mcp-server client"
-
-# Check token claims
-echo ""
-echo "4. Inspecting token claims..."
-CLAIMS=$(echo "$TOKEN" | cut -d'.' -f2 | base64 -d 2>/dev/null | jq '{aud, azp, iss, preferred_username}')
-echo "$CLAIMS"
-
-AUD=$(echo "$CLAIMS" | jq -r '.aud')
-AZP=$(echo "$CLAIMS" | jq -r '.azp')
-
-echo ""
-echo "Architecture validation:"
-if [ "$AUD" = "nextcloud" ]; then
-    echo "  ✓ aud='nextcloud' - Token intended for Nextcloud resource server"
-else
-    echo "  ✗ FAILED: aud='$AUD', expected 'nextcloud'"
-    exit 1
-fi
-
-if [ "$AZP" = "nextcloud-mcp-server" ]; then
-    echo "  ✓ azp='nextcloud-mcp-server' - Token requested by MCP Server client"
-else
-    echo "  ✗ FAILED: azp='$AZP', expected 'nextcloud-mcp-server'"
-    exit 1
-fi
-
-# Test with Nextcloud API
-echo ""
-echo "5. Testing token with Nextcloud API..."
-HTTP_CODE=$(curl -s -w "%{http_code}" -o /tmp/nc_response.json \
-    -H "Authorization: Bearer $TOKEN" \
-    "http://localhost:8080/ocs/v2.php/cloud/capabilities?format=json")
-
-echo "HTTP Status: $HTTP_CODE"
-
-if [ "$HTTP_CODE" = "200" ]; then
-    echo "✓ Token validated successfully!"
-    echo ""
-    echo "===================================================================="
-    echo "SUCCESS: Separate Clients Architecture Working!"
-    echo "===================================================================="
-    echo ""
-    echo "Summary:"
-    echo "  - MCP Server client: 'nextcloud-mcp-server' (requests tokens)"
-    echo "  - Resource server: 'nextcloud' (validates tokens via user_oidc)"
-    echo "  - Token audience: 'nextcloud' (proper resource targeting)"
-    echo "  - Token azp: 'nextcloud-mcp-server' (identifies requester)"
-    echo ""
-    echo "This architecture supports:"
-    echo "  - Future multi-resource tokens: aud=['nextcloud', 'other-service']"
-    echo "  - Clear separation of OAuth client vs resource server"
-    echo "  - RFC 8707 Resource Indicators compliance"
-else
-    echo "✗ Token validation failed"
-    cat /tmp/nc_response.json
-    exit 1
-fi
@@ -9,6 +9,7 @@ import pytest
 from httpx import HTTPStatusError
 from mcp import ClientSession
 from mcp.client.session import RequestContext
+from mcp.client.sse import sse_client
 from mcp.client.streamable_http import streamablehttp_client
 from mcp.types import ElicitRequestParams, ElicitResult, ErrorData

@@ -165,6 +166,51 @@ async def create_mcp_client_session(
    logger.debug(f"{client_name} client session cleaned up successfully")


+async def create_mcp_client_session_sse(
+    url: str,
+    token: str | None = None,
+    client_name: str = "MCP",
+    elicitation_callback: Any = None,
+) -> AsyncGenerator[ClientSession, Any]:
+    """
+    Factory function to create an MCP client session using SSE transport.
+
+    Similar to create_mcp_client_session but uses SSE transport instead of streamable-http.
+    Uses native async context managers to ensure correct LIFO cleanup order.
+
+    Args:
+        url: MCP server URL (e.g., "http://localhost:8000/sse")
+        token: Optional OAuth access token for Bearer authentication
+        client_name: Client name for logging (e.g., "Basic MCP (SSE)")
+        elicitation_callback: Optional callback for handling elicitation requests
+
+    Yields:
+        Initialized MCP ClientSession
+
+    Note:
+        SSE transport is being deprecated in favor of streamable-http.
+        This function exists for compatibility testing only.
+    """
+    logger.info(f"Creating SSE client for {client_name}")
+
+    # Prepare headers with OAuth token if provided
+    headers = {"Authorization": f"Bearer {token}"} if token else None
+
+    # Use native async with - Python ensures LIFO cleanup
+    # Cleanup order will be: ClientSession.__aexit__ -> sse_client.__aexit__
+    # Note: sse_client yields only (read_stream, write_stream), not 3 values like streamablehttp_client
+    async with sse_client(url, headers=headers) as (read_stream, write_stream):
+        async with ClientSession(
+            read_stream, write_stream, elicitation_callback=elicitation_callback
+        ) as session:
+            await session.initialize()
+            logger.info(f"{client_name} client session initialized successfully")
+            yield session
+
+    # Cleanup happens automatically in LIFO order - no exception suppression needed
+    logger.debug(f"{client_name} client session cleaned up successfully")
+
+
@pytest.fixture(scope="session")
 async def nc_client(anyio_backend) -> AsyncGenerator[NextcloudClient, Any]:
    """
@@ -203,12 +249,14 @@ async def nc_client(anyio_backend) -> AsyncGenerator[NextcloudClient, Any]:
@pytest.fixture(scope="session")
 async def nc_mcp_client(anyio_backend) -> AsyncGenerator[ClientSession, Any]:
    """
-    Fixture to create an MCP client session for integration tests using streamable-http.
+    Fixture to create an MCP client session for integration tests using SSE transport.

    Uses anyio pytest plugin for proper async fixture handling.
+
+    Note: SSE transport is being deprecated. This fixture uses SSE for compatibility testing.
    """
-    async for session in create_mcp_client_session(
-        url="http://localhost:8000/mcp", client_name="Basic MCP"
+    async for session in create_mcp_client_session_sse(
+        url="http://localhost:8000/sse", client_name="Basic MCP (SSE)"
    ):
        yield session

@@ -0,0 +1,322 @@
+"""Integration tests for Qdrant collection auto-creation.
+
+These tests validate that:
+1. Collections are automatically created on first access
+2. Dimension validation detects mismatches
+3. Idempotent initialization (multiple calls don't fail)
+4. Proper error handling and logging
+"""
+
+from unittest.mock import Mock
+
+import pytest
+
+from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
+
+pytestmark = pytest.mark.integration
+
+
+@pytest.fixture(autouse=True)
+async def reset_singleton():
+    """Reset the global Qdrant client singleton between tests."""
+    global _qdrant_client
+    import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
+
+    # Store original
+    original = qdrant_module._qdrant_client
+
+    # Reset for test
+    qdrant_module._qdrant_client = None
+
+    yield
+
+    # Restore original
+    qdrant_module._qdrant_client = original
+
+
+@pytest.mark.integration
+async def test_collection_auto_created_on_first_access(monkeypatch):
+    """Test that collection is automatically created if it doesn't exist."""
+    # Mock settings
+    from nextcloud_mcp_server.config import Settings
+
+    mock_settings = Settings(
+        qdrant_location=":memory:",
+        ollama_embedding_model="nomic-embed-text",
+        vector_sync_enabled=False,  # Disable background sync for test
+    )
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # Mock embedding service - must have .provider attribute
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service = Mock()
+    mock_embedding_service.provider = mock_provider
+    mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service,
+    )
+
+    # Get client (should trigger collection creation)
+    client = await get_qdrant_client()
+
+    # Verify client is initialized
+    assert client is not None
+
+    # Verify collection was created
+    collection_name = mock_settings.get_collection_name()
+    collections = await client.get_collections()
+    collection_names = [c.name for c in collections.collections]
+    assert collection_name in collection_names
+
+    # Verify collection has correct dimensions
+    collection_info = await client.get_collection(collection_name)
+    assert collection_info.config.params.vectors.size == 384
+
+
+@pytest.mark.integration
+async def test_existing_collection_reused(monkeypatch):
+    """Test that existing collection is reused without error."""
+    # Mock settings
+    from nextcloud_mcp_server.config import Settings
+
+    mock_settings = Settings(
+        qdrant_location=":memory:",
+        ollama_embedding_model="nomic-embed-text",
+        vector_sync_enabled=False,
+    )
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # Mock embedding service - must have .provider attribute
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service = Mock()
+    mock_embedding_service.provider = mock_provider
+    mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service,
+    )
+
+    # First call - creates collection
+    _ = await get_qdrant_client()
+    collection_name = mock_settings.get_collection_name()
+
+    # Reset singleton to simulate second initialization
+    import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
+
+    qdrant_module._qdrant_client = None
+
+    # Second call - should reuse existing collection
+    client2 = await get_qdrant_client()
+
+    # Verify both clients work
+    assert client2 is not None
+
+    # Verify collection still exists and wasn't recreated
+    collections = await client2.get_collections()
+    collection_names = [c.name for c in collections.collections]
+    assert collection_name in collection_names
+
+    # Verify dimensions unchanged
+    collection_info = await client2.get_collection(collection_name)
+    assert collection_info.config.params.vectors.size == 384
+
+
+@pytest.mark.integration
+async def test_dimension_mismatch_detected(monkeypatch, tmp_path):
+    """Test that dimension mismatch raises clear error."""
+    # Use persistent temp directory so collection survives client reset
+    from nextcloud_mcp_server.config import Settings
+
+    qdrant_path = str(tmp_path / "qdrant_data")
+    mock_settings = Settings(
+        qdrant_location=qdrant_path,
+        ollama_embedding_model="nomic-embed-text",
+        vector_sync_enabled=False,
+    )
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # First embedding service: 384 dimensions
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider_1 = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service_1 = Mock()
+    mock_embedding_service_1.provider = mock_provider_1
+    mock_embedding_service_1.get_dimension = lambda: mock_provider_1.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service_1,
+    )
+
+    # First call - creates collection with 384 dimensions
+    client1 = await get_qdrant_client()
+    collection_name = mock_settings.get_collection_name()
+
+    # Verify collection created
+    collection_info = await client1.get_collection(collection_name)
+    assert collection_info.config.params.vectors.size == 384
+
+    # Close client1 to release file lock
+    await client1.close()
+
+    # Reset singleton (but collection persists in temp directory)
+    import nextcloud_mcp_server.vector.qdrant_client as qdrant_module
+
+    qdrant_module._qdrant_client = None
+
+    # Change embedding service to different dimension (768)
+    mock_provider_2 = SimpleEmbeddingProvider(dimension=768)
+    mock_embedding_service_2 = Mock()
+    mock_embedding_service_2.provider = mock_provider_2
+    mock_embedding_service_2.get_dimension = lambda: mock_provider_2.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service_2,
+    )
+
+    # Second call - should detect dimension mismatch and raise error
+    with pytest.raises(ValueError) as exc_info:
+        await get_qdrant_client()
+
+    # Verify error message is helpful
+    error_msg = str(exc_info.value)
+    assert "Dimension mismatch" in error_msg
+    assert "384" in error_msg  # Old dimension
+    assert "768" in error_msg  # New dimension
+    assert "Solutions:" in error_msg  # Includes helpful solutions
+
+
+@pytest.mark.integration
+async def test_idempotent_initialization(monkeypatch):
+    """Test that multiple calls to get_qdrant_client() are idempotent."""
+    # Mock settings
+    from nextcloud_mcp_server.config import Settings
+
+    mock_settings = Settings(
+        qdrant_location=":memory:",
+        ollama_embedding_model="nomic-embed-text",
+        vector_sync_enabled=False,
+    )
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # Mock embedding service - must have .provider attribute
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service = Mock()
+    mock_embedding_service.provider = mock_provider
+    mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service,
+    )
+
+    # Call multiple times
+    client1 = await get_qdrant_client()
+    client2 = await get_qdrant_client()
+    client3 = await get_qdrant_client()
+
+    # All should return same singleton instance
+    assert client1 is client2
+    assert client2 is client3
+
+    # Collection should exist
+    collection_name = mock_settings.get_collection_name()
+    collections = await client1.get_collections()
+    collection_names = [c.name for c in collections.collections]
+    assert collection_name in collection_names
+
+
+@pytest.mark.integration
+async def test_collection_name_generation(monkeypatch):
+    """Test that collection name is correctly generated from deployment ID and model."""
+    # Mock settings with custom deployment ID
+    from nextcloud_mcp_server.config import Settings
+
+    mock_settings = Settings(
+        qdrant_location=":memory:",
+        ollama_embedding_model="test-model",
+        vector_sync_enabled=False,
+    )
+
+    # Mock deployment ID
+    monkeypatch.setenv("MCP_DEPLOYMENT_ID", "test-deployment")
+
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # Mock embedding service - must have .provider attribute
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service = Mock()
+    mock_embedding_service.provider = mock_provider
+    mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service,
+    )
+
+    # Get client
+    client = await get_qdrant_client()
+
+    # Verify collection name includes deployment ID and model
+    collection_name = mock_settings.get_collection_name()
+    assert "test-deployment" in collection_name or "test-model" in collection_name
+
+    # Verify collection was created with that name
+    collections = await client.get_collections()
+    collection_names = [c.name for c in collections.collections]
+    assert collection_name in collection_names
+
+
+@pytest.mark.integration
+async def test_collection_uses_cosine_distance(monkeypatch):
+    """Test that created collection uses COSINE distance metric."""
+    # Mock settings
+    from nextcloud_mcp_server.config import Settings
+
+    mock_settings = Settings(
+        qdrant_location=":memory:",
+        ollama_embedding_model="nomic-embed-text",
+        vector_sync_enabled=False,
+    )
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.vector.qdrant_client.get_settings", lambda: mock_settings
+    )
+
+    # Mock embedding service - must have .provider attribute
+    from nextcloud_mcp_server.embedding import SimpleEmbeddingProvider
+
+    mock_provider = SimpleEmbeddingProvider(dimension=384)
+    mock_embedding_service = Mock()
+    mock_embedding_service.provider = mock_provider
+    mock_embedding_service.get_dimension = lambda: mock_provider.get_dimension()
+    monkeypatch.setattr(
+        "nextcloud_mcp_server.embedding.get_embedding_service",
+        lambda: mock_embedding_service,
+    )
+
+    # Get client (creates collection)
+    client = await get_qdrant_client()
+
+    # Verify collection uses COSINE distance
+    collection_name = mock_settings.get_collection_name()
+    collection_info = await client.get_collection(collection_name)
+
+    from qdrant_client.models import Distance
+
+    assert collection_info.config.params.vectors.distance == Distance.COSINE
@@ -1053,7 +1053,7 @@ wheels = [

 [[package]]
 name = "nextcloud-mcp-server"
-version = "0.31.1"
+version = "0.36.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiosqlite" },