Compare commits
88 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a3b80cb98 | |||
| fc3ab8d0ac | |||
| 0f03541486 | |||
| ef07b1a6c9 | |||
| 4f82357f24 | |||
| c4293b6750 | |||
| 72e4eb3d19 | |||
| 47dd2df7aa | |||
| 9fd2022151 | |||
| b99dc52c95 | |||
| 78b27fb5e9 | |||
| 03e39a3f94 | |||
| 5259658458 | |||
| e03a3c2e83 | |||
| 94cbd3015d | |||
| 49a961cbcc | |||
| e1aca04aff | |||
| 3b12e585ca | |||
| e647c87dd8 | |||
| cb74157d51 | |||
| 202058bdc8 | |||
| c312911538 | |||
| e602684743 | |||
| 8221046d8a | |||
| 3e45b6ca25 | |||
| 9ec7637579 | |||
| 670188f9e4 | |||
| 3878beaf65 | |||
| a5a0571bde | |||
| 0e7e74867f | |||
| a29045cca4 | |||
| b11c3ddfb6 | |||
| 562c102711 | |||
| 3c3646bec2 | |||
| dd636e6a08 | |||
| d7a8719d0e | |||
| 97fa9ef8a7 | |||
| 77dd17b3e1 | |||
| d56ec33b77 | |||
| a1c5acc1c2 | |||
| e0de2e17e9 | |||
| 4fc0cb5a41 | |||
| ff9cca716b | |||
| ef4a82e589 | |||
| 301c502e57 | |||
| d4d291d6d2 | |||
| e4b0ea5093 | |||
| 6833f7f117 | |||
| 7db2a5c586 | |||
| b76c10f18c | |||
| ab7411d9fd | |||
| d02fe3c3b6 | |||
| 49f9cead69 | |||
| 415b1c901b | |||
| 90b96a8afe | |||
| 57a2157c58 | |||
| bfdc33c390 | |||
| 8844c07ecb | |||
| 0a0ef10989 | |||
| 9414d9c9c3 | |||
| 8a52df4a8e | |||
| a36038422b | |||
| 2147fc1696 | |||
| a19017c686 | |||
| f0e5333e43 | |||
| 553e84e5f2 | |||
| ff20031601 | |||
| 04e0ab127a | |||
| 1117a83a52 | |||
| 50a824155c | |||
| 0df9e41332 | |||
| 3baf10662f | |||
| bfbaed9a66 | |||
| ff32149220 | |||
| db79afacb9 | |||
| 6730dd4a4b | |||
| 8734c4b292 | |||
| 29df645d53 | |||
| 8942f3119c | |||
| 3863cca2ed | |||
| 98627593d5 | |||
| 64649c902d | |||
| 3ff6346c03 | |||
| c9a687171a | |||
| df5f85e0c6 | |||
| 76dce41ed9 | |||
| 642108ee91 | |||
| ce5724f05e |
@@ -0,0 +1,122 @@
|
||||
name: Release Charts
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
jobs:
|
||||
release:
|
||||
# depending on default permission settings for your org (contents being read-only or read-write for workloads), you will have to add permissions
|
||||
# see: https://docs.github.com/en/actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
||||
permissions:
|
||||
contents: write
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "$GITHUB_ACTOR"
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Run chart-releaser
|
||||
uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
|
||||
env:
|
||||
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
- name: Update gh-pages with Chart README and Index
|
||||
run: |
|
||||
# Get the repository name
|
||||
REPO_NAME="${GITHUB_REPOSITORY##*/}"
|
||||
REPO_OWNER="${GITHUB_REPOSITORY%/*}"
|
||||
|
||||
# Switch to gh-pages branch
|
||||
git fetch origin gh-pages
|
||||
git checkout gh-pages
|
||||
|
||||
# Copy Chart README to root
|
||||
git checkout ${GITHUB_REF#refs/tags/} -- charts/nextcloud-mcp-server/README.md
|
||||
mv charts/nextcloud-mcp-server/README.md README.md || true
|
||||
rm -rf charts 2>/dev/null || true
|
||||
|
||||
# Create index.html with installation instructions
|
||||
cat > index.html <<'EOF'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Nextcloud MCP Server Helm Chart</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 50px auto;
|
||||
padding: 20px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
code {
|
||||
background: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: "Monaco", "Courier New", monospace;
|
||||
}
|
||||
pre {
|
||||
background: #f4f4f4;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
h1, h2 { color: #0082c9; }
|
||||
a { color: #0082c9; text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Nextcloud MCP Server Helm Chart</h1>
|
||||
|
||||
<p>A Helm chart for deploying the Nextcloud MCP (Model Context Protocol) Server on Kubernetes, enabling AI assistants to interact with your Nextcloud instance.</p>
|
||||
|
||||
<h2>Installation</h2>
|
||||
|
||||
<p>Add the Helm repository:</p>
|
||||
<pre><code>helm repo add nextcloud-mcp https://REPO_OWNER.github.io/REPO_NAME/
|
||||
helm repo update</code></pre>
|
||||
|
||||
<p>Install the chart:</p>
|
||||
<pre><code>helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
|
||||
--set nextcloud.host=https://cloud.example.com \
|
||||
--set auth.basic.username=myuser \
|
||||
--set auth.basic.password=mypassword</code></pre>
|
||||
|
||||
<h2>Documentation</h2>
|
||||
|
||||
<ul>
|
||||
<li><a href="README.md">Chart README</a> - Full documentation for the Helm chart</li>
|
||||
<li><a href="https://github.com/REPO_OWNER/REPO_NAME">GitHub Repository</a> - Source code and issues</li>
|
||||
<li><a href="index.yaml">Helm Repository Index</a> - Chart metadata</li>
|
||||
</ul>
|
||||
|
||||
<h2>Quick Start</h2>
|
||||
|
||||
<p>See the <a href="README.md">full documentation</a> for detailed configuration options, examples, and troubleshooting guides.</p>
|
||||
|
||||
<hr>
|
||||
<p><small>Generated by <a href="https://github.com/helm/chart-releaser">chart-releaser</a></small></p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
# Replace placeholders
|
||||
sed -i "s/REPO_OWNER/$REPO_OWNER/g" index.html
|
||||
sed -i "s/REPO_NAME/$REPO_NAME/g" index.html
|
||||
|
||||
# Commit changes
|
||||
git add README.md index.html
|
||||
git commit -m "Update README and index from chart release" || echo "No changes to commit"
|
||||
git push origin gh-pages
|
||||
@@ -20,7 +20,7 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
- name: Install Python 3.11
|
||||
run: uv python install 3.11
|
||||
- name: Build
|
||||
|
||||
@@ -11,7 +11,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
- name: Check format
|
||||
run: |
|
||||
uv run --frozen ruff format --diff
|
||||
@@ -32,7 +32,7 @@ jobs:
|
||||
###### Required to build OIDC App ######
|
||||
|
||||
- name: Set up php 8.4
|
||||
uses: shivammathur/setup-php@v2
|
||||
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
|
||||
with:
|
||||
php-version: 8.4
|
||||
coverage: none
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
up-flags: "--build"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
|
||||
- name: Install Playwright dependencies
|
||||
run: |
|
||||
|
||||
@@ -1,3 +1,91 @@
|
||||
## v0.22.7 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Remove image tag overide
|
||||
|
||||
## v0.22.6 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm chart with extraArgs
|
||||
|
||||
## v0.22.5 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update helm chart variables
|
||||
|
||||
## v0.22.4 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.3 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.2 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.1 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Trigger release
|
||||
|
||||
## v0.22.0 (2025-10-29)
|
||||
|
||||
### Feat
|
||||
|
||||
- **server**: Add /live & /health endpoints
|
||||
- Initialize helm chart
|
||||
|
||||
## v0.21.0 (2025-10-25)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add text processing background worker for telling client about progress
|
||||
|
||||
### Refactor
|
||||
|
||||
- Transform document parsing into pluggable processor architecture
|
||||
|
||||
## v0.20.0 (2025-10-24)
|
||||
|
||||
### Feat
|
||||
|
||||
- **auth**: Add support for client registration deletion
|
||||
- Split read/write scopes into app:read/write scopes
|
||||
|
||||
### Fix
|
||||
|
||||
- Add support for RFC 7592 client registration and deletion
|
||||
- Update webdav models for proper serialization
|
||||
|
||||
## v0.19.1 (2025-10-24)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.19,<1.20
|
||||
|
||||
## v0.19.0 (2025-10-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Enable token introspection for opaque tokens
|
||||
|
||||
### Fix
|
||||
|
||||
- Add CORS middleware to allow browser-based clients like MCP Inspector
|
||||
|
||||
## v0.18.0 (2025-10-23)
|
||||
|
||||
### Feat
|
||||
|
||||
@@ -38,6 +38,8 @@ uv run pytest -m integration -v
|
||||
uv run pytest -m "not integration" -v
|
||||
```
|
||||
|
||||
! Hint: If the tests are failing due to missing environment variables, then usually the correct .env has not been created or not correctly configured yet.
|
||||
|
||||
### Load Testing
|
||||
```bash
|
||||
# Run benchmark with default settings (10 workers, 30 seconds)
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
FROM ghcr.io/astral-sh/uv:0.9.5-python3.11-alpine@sha256:64ecec379ff82bea84b8a80c0b374f6392bcd54aa52f8c63c12f510f9c0b214d
|
||||
FROM ghcr.io/astral-sh/uv:0.9.6-python3.11-alpine@sha256:b2a366adae7002a23dbba79791baac4e607ee5af5d45039d072d30115c505666
|
||||
|
||||
# Install git (required for caldav dependency from git)
|
||||
RUN apk add --no-cache git
|
||||
|
||||
@@ -23,6 +23,7 @@ The Nextcloud MCP (Model Context Protocol) server allows Large Language Models l
|
||||
| **Calendar** | ✅ Full CalDAV + tasks (20+ tools) | ✅ Events, free/busy, tasks (4 tools) |
|
||||
| **Contacts** | ✅ Full CardDAV (8 tools) | ✅ Find person, current user (2 tools) |
|
||||
| **Files (WebDAV)** | ✅ Full filesystem access (12 tools) | ✅ Read, folder tree, sharing (3 tools) |
|
||||
| **Document Processing** | ✅ OCR with progress (PDF, DOCX, images) | ❌ Not implemented |
|
||||
| **Deck** | ✅ Full project management (15 tools) | ✅ Basic board/card ops (2 tools) |
|
||||
| **Tables** | ✅ Row operations (5 tools) | ❌ Not implemented |
|
||||
| **Cookbook** | ✅ Full recipe management (13 tools) | ❌ Not implemented |
|
||||
@@ -71,9 +72,17 @@ uv sync
|
||||
|
||||
# Or using Docker
|
||||
docker pull ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
|
||||
# Or deploy to Kubernetes with Helm
|
||||
helm repo add nextcloud-mcp https://cbcoutinho.github.io/nextcloud-mcp-server
|
||||
helm repo update
|
||||
helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
|
||||
--set nextcloud.host=https://cloud.example.com \
|
||||
--set auth.basic.username=myuser \
|
||||
--set auth.basic.password=mypassword
|
||||
```
|
||||
|
||||
See [Installation Guide](docs/installation.md) for detailed instructions.
|
||||
See [Installation Guide](docs/installation.md) for detailed instructions, or [Helm Chart README](charts/nextcloud-mcp-server/README.md) for Kubernetes deployment.
|
||||
|
||||
### 2. Configure
|
||||
|
||||
@@ -185,18 +194,67 @@ The server exposes Nextcloud functionality through MCP tools (for actions) and r
|
||||
|
||||
The server provides 90+ tools across 8 Nextcloud apps. When using OAuth, tools are dynamically filtered based on your granted scopes.
|
||||
|
||||
For a complete list of all supported OAuth scopes and their descriptions, see [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes).
|
||||
|
||||
#### Available Tool Categories
|
||||
|
||||
| App | Tools | Read Scope | Write Scope | Operations |
|
||||
|-----|-------|-----------|-------------|------------|
|
||||
| **Notes** | 7 | `mcp:notes:read` | `mcp:notes:write` | Create, read, update, delete, search notes |
|
||||
| **Calendar** | 20+ | `mcp:calendar:read` | `mcp:calendar:write` | Events, todos (tasks), calendars, recurring events, attendees |
|
||||
| **Contacts** | 8 | `mcp:contacts:read` | `mcp:contacts:write` | Create, read, update, delete contacts and address books |
|
||||
| **Files (WebDAV)** | 12 | `mcp:files:read` | `mcp:files:write` | List, read, upload, delete, move files and folders |
|
||||
| **Deck** | 15 | `mcp:deck:read` | `mcp:deck:write` | Boards, stacks, cards, labels, assignments |
|
||||
| **Cookbook** | 13 | `mcp:cookbook:read` | `mcp:cookbook:write` | Recipes, import from URLs, search, categories |
|
||||
| **Tables** | 5 | `mcp:tables:read` | `mcp:tables:write` | Row operations on Nextcloud Tables |
|
||||
| **Sharing** | 10+ | `mcp:sharing:read` | `mcp:sharing:write` | Create, manage, delete shares |
|
||||
| **Notes** | 7 | `notes:read` | `notes:write` | Create, read, update, delete, search notes |
|
||||
| **Calendar** | 20+ | `calendar:read` `todo:read` | `calendar:write` `todo:write` | Events, todos (tasks), calendars, recurring events, attendees |
|
||||
| **Contacts** | 8 | `contacts:read` | `contacts:write` | Create, read, update, delete contacts and address books |
|
||||
| **Files (WebDAV)** | 12 | `files:read` | `files:write` | List, read, upload, delete, move files; **OCR/document processing** |
|
||||
| **Deck** | 15 | `deck:read` | `deck:write` | Boards, stacks, cards, labels, assignments |
|
||||
| **Cookbook** | 13 | `cookbook:read` | `cookbook:write` | Recipes, import from URLs, search, categories |
|
||||
| **Tables** | 5 | `tables:read` | `tables:write` | Row operations on Nextcloud Tables |
|
||||
| **Sharing** | 10+ | `sharing:read` | `sharing:write` | Create, manage, delete shares |
|
||||
|
||||
#### Document Processing (Optional)
|
||||
|
||||
The WebDAV file reading tool (`nc_webdav_read_file`) supports **automatic text extraction** from documents and images:
|
||||
|
||||
**Supported Formats:**
|
||||
- **Documents**: PDF, DOCX, PPTX, XLSX, RTF, ODT, EPUB
|
||||
- **Images**: PNG, JPEG, TIFF, BMP (with OCR)
|
||||
- **Email**: EML, MSG files
|
||||
|
||||
**Features:**
|
||||
- **Progress Notifications**: Long-running OCR operations (up to 120s) send progress updates every 10 seconds to prevent client timeouts
|
||||
- **Pluggable Architecture**: Multiple processor backends (Unstructured.io, Tesseract, custom HTTP APIs)
|
||||
- **Automatic Detection**: Files are processed based on MIME type
|
||||
- **Graceful Fallback**: Returns base64-encoded content if processing fails
|
||||
|
||||
**Configuration:**
|
||||
```dotenv
|
||||
# Enable document processing (optional)
|
||||
ENABLE_DOCUMENT_PROCESSING=true
|
||||
|
||||
# Unstructured.io processor (cloud/API-based, supports many formats)
|
||||
ENABLE_UNSTRUCTURED=true
|
||||
UNSTRUCTURED_API_URL=http://localhost:8002
|
||||
UNSTRUCTURED_STRATEGY=auto # auto, fast, or hi_res
|
||||
UNSTRUCTURED_LANGUAGES=eng,deu
|
||||
PROGRESS_INTERVAL=10 # Progress update interval in seconds
|
||||
|
||||
# Tesseract processor (local OCR, images only)
|
||||
ENABLE_TESSERACT=false
|
||||
TESSERACT_LANG=eng
|
||||
|
||||
# Custom HTTP processor
|
||||
ENABLE_CUSTOM_PROCESSOR=false
|
||||
CUSTOM_PROCESSOR_URL=http://localhost:9000/process
|
||||
CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg
|
||||
```
|
||||
|
||||
**Example Usage:**
|
||||
```
|
||||
AI: "Read the contents of Documents/report.pdf"
|
||||
→ Uses nc_webdav_read_file tool with automatic OCR processing
|
||||
→ Returns extracted text with parsing metadata
|
||||
→ Sends progress updates during long operations
|
||||
```
|
||||
|
||||
See [env.sample](env.sample) for complete configuration options.
|
||||
|
||||
**Example Tools:**
|
||||
- `nc_notes_create_note` - Create a new note
|
||||
@@ -209,7 +267,7 @@ The server provides 90+ tools across 8 Nextcloud apps. When using OAuth, tools a
|
||||
- And 80+ more...
|
||||
|
||||
> [!TIP]
|
||||
> **OAuth Scope Filtering**: When connecting via OAuth, MCP clients will only see tools for which you've granted access. For example, granting only `mcp:notes:read` and `mcp:notes:write` will show 7 Notes tools instead of all 90+ tools. See [OAuth Troubleshooting - Limited Scopes](docs/oauth-troubleshooting.md#limited-scopes---only-seeing-notes-tools) if you're only seeing a subset of tools.
|
||||
> **OAuth Scope Filtering**: When connecting via OAuth, MCP clients will only see tools for which you've granted access. For example, granting only `notes:read` and `notes:write` will show 7 Notes tools instead of all 90+ tools. See [OAuth Scopes Documentation](docs/oauth-architecture.md#oauth-scopes) for the complete scope reference, or [OAuth Troubleshooting - Limited Scopes](docs/oauth-troubleshooting.md#limited-scopes---only-seeing-notes-tools) if you're only seeing a subset of tools.
|
||||
>
|
||||
> **Known Issue**: Claude Code and some other MCP clients may only request/grant Notes scopes during initial connection. Track progress at [#234](https://github.com/cbcoutinho/nextcloud-mcp-server/issues/234).
|
||||
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
@@ -0,0 +1,23 @@
|
||||
apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.22.7
|
||||
appVersion: "0.22.7"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
- model-context-protocol
|
||||
- llm
|
||||
- ai
|
||||
- claude
|
||||
- webdav
|
||||
- caldav
|
||||
- carddav
|
||||
maintainers:
|
||||
- name: Chris Coutinho
|
||||
email: chris@coutinho.io
|
||||
home: https://github.com/cbcoutinho/nextcloud-mcp-server
|
||||
sources:
|
||||
- https://github.com/cbcoutinho/nextcloud-mcp-server
|
||||
icon: https://raw.githubusercontent.com/nextcloud/server/master/core/img/logo/logo.svg
|
||||
@@ -0,0 +1,489 @@
|
||||
# Nextcloud MCP Server Helm Chart
|
||||
|
||||
This Helm chart deploys the Nextcloud MCP (Model Context Protocol) Server on a Kubernetes cluster, enabling AI assistants to interact with your Nextcloud instance.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Kubernetes 1.19+
|
||||
- Helm 3.0+
|
||||
- A running Nextcloud instance (accessible from the Kubernetes cluster)
|
||||
- Nextcloud credentials (username/password for basic auth OR OAuth client for OAuth mode)
|
||||
|
||||
## Installation
|
||||
|
||||
### Quick Start with Basic Authentication
|
||||
|
||||
```bash
|
||||
# Install with basic auth (recommended for most users)
|
||||
helm install nextcloud-mcp ./helm/nextcloud-mcp-server \
|
||||
--set nextcloud.host=https://cloud.example.com \
|
||||
--set auth.basic.username=myuser \
|
||||
--set auth.basic.password=mypassword
|
||||
```
|
||||
|
||||
### Using a values file
|
||||
|
||||
Create a `custom-values.yaml` file:
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
|
||||
auth:
|
||||
mode: basic
|
||||
basic:
|
||||
username: myuser
|
||||
password: mypassword
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
```
|
||||
|
||||
Install with your custom values:
|
||||
|
||||
```bash
|
||||
helm install nextcloud-mcp ./helm/nextcloud-mcp-server -f custom-values.yaml
|
||||
```
|
||||
|
||||
### OAuth Authentication Mode (Experimental)
|
||||
|
||||
**Warning:** OAuth mode is experimental and requires patches to the Nextcloud `user_oidc` app. See the [Authentication Guide](https://github.com/cbcoutinho/nextcloud-mcp-server#authentication) for details.
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
mcpServerUrl: https://mcp.example.com
|
||||
publicIssuerUrl: https://cloud.example.com
|
||||
|
||||
auth:
|
||||
mode: oauth
|
||||
oauth:
|
||||
# Optional: provide pre-registered client credentials
|
||||
# If not provided, will use Dynamic Client Registration
|
||||
clientId: "your-client-id"
|
||||
clientSecret: "your-client-secret"
|
||||
persistence:
|
||||
enabled: true
|
||||
size: 100Mi
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: mcp.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: nextcloud-mcp-tls
|
||||
hosts:
|
||||
- mcp.example.com
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Key Configuration Parameters
|
||||
|
||||
#### Nextcloud Connection
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `nextcloud.host` | URL of your Nextcloud instance (required) | `""` |
|
||||
| `nextcloud.mcpServerUrl` | MCP server URL for OAuth callbacks (OAuth only, optional) | Smart default* |
|
||||
| `nextcloud.publicIssuerUrl` | Public issuer URL for OAuth (OAuth only, optional) | Smart default** |
|
||||
|
||||
**Smart Defaults:**
|
||||
- `*mcpServerUrl`: If not set, automatically uses ingress host (if enabled) or `http://localhost:8000` (for port-forward setups)
|
||||
- `**publicIssuerUrl`: If not set, automatically defaults to `nextcloud.host` (which works when both clients and MCP server access Nextcloud at the same URL)
|
||||
|
||||
#### Authentication
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `auth.mode` | Authentication mode: `basic` or `oauth` | `basic` |
|
||||
| `auth.basic.username` | Nextcloud username (basic auth) | `""` |
|
||||
| `auth.basic.password` | Nextcloud password (basic auth) | `""` |
|
||||
| `auth.basic.existingSecret` | Use existing secret for credentials | `""` |
|
||||
| `auth.oauth.clientId` | OAuth client ID (OAuth mode, optional) | `""` |
|
||||
| `auth.oauth.clientSecret` | OAuth client secret (OAuth mode, optional) | `""` |
|
||||
| `auth.oauth.persistence.enabled` | Enable persistent storage for OAuth | `true` |
|
||||
| `auth.oauth.persistence.size` | Size of OAuth storage PVC | `100Mi` |
|
||||
|
||||
#### MCP Server Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `mcp.transport` | Transport mode | `streamable-http` |
|
||||
| `mcp.port` | Server port (used by both auth modes) | `8000` |
|
||||
| `mcp.extraArgs` | Additional command-line arguments | `[]` |
|
||||
|
||||
The `extraArgs` parameter allows you to pass additional command-line arguments to the MCP server. This is useful for enabling debug logging, enabling specific apps, or other runtime configuration.
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
mcp:
|
||||
extraArgs:
|
||||
- "--log-level"
|
||||
- "debug"
|
||||
- "--enable-app"
|
||||
- "notes"
|
||||
```
|
||||
|
||||
#### Image Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `image.repository` | Container image repository | `ghcr.io/cbcoutinho/nextcloud-mcp-server` |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
|
||||
**Note:** Image tag is automatically set to the chart's `appVersion` and cannot be overridden.
|
||||
|
||||
#### Resources
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `resources.limits.cpu` | CPU limit | `1000m` |
|
||||
| `resources.limits.memory` | Memory limit | `512Mi` |
|
||||
| `resources.requests.cpu` | CPU request | `100m` |
|
||||
| `resources.requests.memory` | Memory request | `128Mi` |
|
||||
|
||||
#### Service
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `service.type` | Service type | `ClusterIP` |
|
||||
| `service.port` | Service port | `8000` |
|
||||
|
||||
#### Ingress
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `ingress.enabled` | Enable ingress | `false` |
|
||||
| `ingress.className` | Ingress class name | `""` |
|
||||
| `ingress.hosts` | Ingress host configuration | See values.yaml |
|
||||
| `ingress.tls` | Ingress TLS configuration | `[]` |
|
||||
|
||||
#### Autoscaling
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `autoscaling.enabled` | Enable HPA | `false` |
|
||||
| `autoscaling.minReplicas` | Minimum replicas | `1` |
|
||||
| `autoscaling.maxReplicas` | Maximum replicas | `10` |
|
||||
| `autoscaling.targetCPUUtilizationPercentage` | Target CPU % | `80` |
|
||||
|
||||
#### Health Probes
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `livenessProbe.httpGet.path` | Liveness probe endpoint | `/health/live` |
|
||||
| `livenessProbe.initialDelaySeconds` | Initial delay for liveness | `30` |
|
||||
| `livenessProbe.periodSeconds` | Check interval for liveness | `10` |
|
||||
| `readinessProbe.httpGet.path` | Readiness probe endpoint | `/health/ready` |
|
||||
| `readinessProbe.initialDelaySeconds` | Initial delay for readiness | `10` |
|
||||
| `readinessProbe.periodSeconds` | Check interval for readiness | `5` |
|
||||
|
||||
The application exposes HTTP health check endpoints:
|
||||
- `/health/live` - Liveness probe (checks if application is running)
|
||||
- `/health/ready` - Readiness probe (checks if application is ready to serve traffic)
|
||||
|
||||
#### Document Processing (Optional)
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `documentProcessing.enabled` | Enable document processing | `false` |
|
||||
| `documentProcessing.defaultProcessor` | Default processor | `unstructured` |
|
||||
| `documentProcessing.unstructured.enabled` | Enable Unstructured.io processor | `false` |
|
||||
| `documentProcessing.unstructured.apiUrl` | Unstructured API URL | `http://unstructured:8000` |
|
||||
| `documentProcessing.tesseract.enabled` | Enable Tesseract OCR | `false` |
|
||||
|
||||
## Examples
|
||||
|
||||
### Example 1: Basic Auth with Ingress
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
|
||||
auth:
|
||||
mode: basic
|
||||
basic:
|
||||
username: admin
|
||||
password: secure-password
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: mcp.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: mcp-tls
|
||||
hosts:
|
||||
- mcp.example.com
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
```
|
||||
|
||||
### Example 2: Using Existing Secrets
|
||||
|
||||
#### Basic Auth with Existing Secret
|
||||
|
||||
Create a secret manually:
|
||||
|
||||
```bash
|
||||
kubectl create secret generic nextcloud-credentials \
|
||||
--from-literal=username=myuser \
|
||||
--from-literal=password=mypassword
|
||||
```
|
||||
|
||||
Then reference it in your values:
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
|
||||
auth:
|
||||
mode: basic
|
||||
basic:
|
||||
existingSecret: nextcloud-credentials
|
||||
usernameKey: username
|
||||
passwordKey: password
|
||||
```
|
||||
|
||||
#### OAuth with Existing Secret (Pre-registered Client)
|
||||
|
||||
If you have a pre-registered OAuth client:
|
||||
|
||||
```bash
|
||||
kubectl create secret generic nextcloud-oauth-creds \
|
||||
--from-literal=clientId=my-oauth-client-id \
|
||||
--from-literal=clientSecret=my-oauth-client-secret
|
||||
```
|
||||
|
||||
Then reference it in your values:
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
# mcpServerUrl and publicIssuerUrl are optional!
|
||||
# If not set, mcpServerUrl defaults to ingress host or localhost
|
||||
# publicIssuerUrl defaults to nextcloud.host
|
||||
|
||||
auth:
|
||||
mode: oauth
|
||||
oauth:
|
||||
existingSecret: nextcloud-oauth-creds
|
||||
clientIdKey: clientId
|
||||
clientSecretKey: clientSecret
|
||||
persistence:
|
||||
enabled: true
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
hosts:
|
||||
- host: mcp.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: mcp-tls
|
||||
hosts:
|
||||
- mcp.example.com
|
||||
```
|
||||
|
||||
### Example 3: OAuth with Document Processing and Dynamic Client Registration
|
||||
|
||||
This example shows OAuth without pre-registered credentials (using DCR) and optional URL values:
|
||||
|
||||
```yaml
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
# mcpServerUrl will automatically use ingress host (https://mcp.example.com)
|
||||
# publicIssuerUrl will automatically default to nextcloud.host
|
||||
|
||||
auth:
|
||||
mode: oauth
|
||||
oauth:
|
||||
# No clientId/clientSecret - will use Dynamic Client Registration!
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: fast-ssd
|
||||
size: 200Mi
|
||||
|
||||
documentProcessing:
|
||||
enabled: true
|
||||
defaultProcessor: unstructured
|
||||
unstructured:
|
||||
enabled: true
|
||||
apiUrl: http://unstructured-api:8000
|
||||
strategy: hi_res
|
||||
languages: eng,deu,fra
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: nginx
|
||||
hosts:
|
||||
- host: mcp.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
```
|
||||
|
||||
### Example 4: High Availability with Autoscaling
|
||||
|
||||
```yaml
|
||||
replicaCount: 2
|
||||
|
||||
autoscaling:
|
||||
enabled: true
|
||||
minReplicas: 2
|
||||
maxReplicas: 20
|
||||
targetCPUUtilizationPercentage: 70
|
||||
targetMemoryUtilizationPercentage: 80
|
||||
|
||||
resources:
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 1Gi
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
|
||||
affinity:
|
||||
podAntiAffinity:
|
||||
preferredDuringSchedulingIgnoredDuringExecution:
|
||||
- weight: 100
|
||||
podAffinityTerm:
|
||||
labelSelector:
|
||||
matchExpressions:
|
||||
- key: app.kubernetes.io/name
|
||||
operator: In
|
||||
values:
|
||||
- nextcloud-mcp-server
|
||||
topologyKey: kubernetes.io/hostname
|
||||
```
|
||||
|
||||
## Upgrading
|
||||
|
||||
### To upgrade an existing deployment:
|
||||
|
||||
```bash
|
||||
helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server -f custom-values.yaml
|
||||
```
|
||||
|
||||
### To upgrade with new values:
|
||||
|
||||
```bash
|
||||
helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server \
|
||||
--set resources.limits.memory=1Gi
|
||||
```
|
||||
|
||||
## Uninstalling
|
||||
|
||||
```bash
|
||||
helm uninstall nextcloud-mcp
|
||||
```
|
||||
|
||||
**Note:** This will delete all resources including PVCs. If you want to preserve OAuth client data, backup the PVC before uninstalling.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check pod status
|
||||
|
||||
```bash
|
||||
kubectl get pods -l app.kubernetes.io/name=nextcloud-mcp-server
|
||||
```
|
||||
|
||||
### View logs
|
||||
|
||||
```bash
|
||||
kubectl logs -l app.kubernetes.io/name=nextcloud-mcp-server --tail=100 -f
|
||||
```
|
||||
|
||||
### Check health endpoints
|
||||
|
||||
The application exposes health check endpoints for monitoring:
|
||||
|
||||
```bash
|
||||
# Port forward to the service
|
||||
kubectl port-forward svc/nextcloud-mcp 8000:8000
|
||||
|
||||
# Check liveness (if app is running)
|
||||
curl http://localhost:8000/health/live
|
||||
|
||||
# Check readiness (if app is ready to serve traffic)
|
||||
curl http://localhost:8000/health/ready
|
||||
```
|
||||
|
||||
**Example responses:**
|
||||
|
||||
Liveness (always returns 200 if running):
|
||||
```json
|
||||
{
|
||||
"status": "alive",
|
||||
"mode": "basic"
|
||||
}
|
||||
```
|
||||
|
||||
Readiness (returns 200 if ready, 503 if not ready):
|
||||
```json
|
||||
{
|
||||
"status": "ready",
|
||||
"checks": {
|
||||
"nextcloud_configured": "ok",
|
||||
"auth_mode": "basic",
|
||||
"auth_configured": "ok"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Common Issues
|
||||
|
||||
1. **Connection refused to Nextcloud**
|
||||
- Verify `nextcloud.host` is accessible from the Kubernetes cluster
|
||||
- Check network policies and firewall rules
|
||||
|
||||
2. **Authentication failures**
|
||||
- For basic auth: verify username/password are correct
|
||||
- For OAuth: check that OIDC app is properly configured
|
||||
|
||||
3. **OAuth persistence issues**
|
||||
- Verify PVC is bound: `kubectl get pvc`
|
||||
- Check storage class exists: `kubectl get storageclass`
|
||||
|
||||
4. **Resource constraints**
|
||||
- Increase memory limits if seeing OOM errors
|
||||
- Adjust CPU requests based on load
|
||||
|
||||
## Security Considerations
|
||||
|
||||
1. **Secrets Management**: Consider using external secret management (e.g., Sealed Secrets, External Secrets Operator)
|
||||
2. **TLS**: Always use TLS/HTTPS for production deployments
|
||||
3. **Network Policies**: Restrict network access to necessary services only
|
||||
4. **RBAC**: Review and customize ServiceAccount permissions as needed
|
||||
5. **App Passwords**: For basic auth, use Nextcloud app passwords instead of main account passwords
|
||||
|
||||
## Support
|
||||
|
||||
- GitHub Issues: https://github.com/cbcoutinho/nextcloud-mcp-server/issues
|
||||
- Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
|
||||
|
||||
## License
|
||||
|
||||
This chart is licensed under AGPL-3.0, consistent with the Nextcloud MCP Server project.
|
||||
@@ -0,0 +1,80 @@
|
||||
Thank you for installing {{ .Chart.Name }}!
|
||||
|
||||
Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentication mode.
|
||||
|
||||
1. Get the application URL by running these commands:
|
||||
{{- if .Values.ingress.enabled }}
|
||||
{{- range $host := .Values.ingress.hosts }}
|
||||
{{- range .paths }}
|
||||
http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if contains "NodePort" .Values.service.type }}
|
||||
export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "nextcloud-mcp-server.fullname" . }})
|
||||
export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}")
|
||||
echo http://$NODE_IP:$NODE_PORT
|
||||
{{- else if contains "LoadBalancer" .Values.service.type }}
|
||||
NOTE: It may take a few minutes for the LoadBalancer IP to be available.
|
||||
You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "nextcloud-mcp-server.fullname" . }}'
|
||||
export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "nextcloud-mcp-server.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}")
|
||||
echo http://$SERVICE_IP:{{ .Values.service.port }}
|
||||
{{- else if contains "ClusterIP" .Values.service.type }}
|
||||
export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "nextcloud-mcp-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}")
|
||||
export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
|
||||
echo "Visit http://127.0.0.1:8080 to use your MCP server"
|
||||
kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT
|
||||
{{- end }}
|
||||
|
||||
2. Check the deployment status:
|
||||
kubectl --namespace {{ .Release.Namespace }} get pods -l "app.kubernetes.io/name={{ include "nextcloud-mcp-server.name" . }},app.kubernetes.io/instance={{ .Release.Name }}"
|
||||
|
||||
{{- if eq .Values.auth.mode "basic" }}
|
||||
|
||||
3. Basic Authentication Mode:
|
||||
{{- if .Values.auth.basic.existingSecret }}
|
||||
- Credentials: (using existing secret {{ .Values.auth.basic.existingSecret }})
|
||||
{{- else }}
|
||||
- Username: {{ .Values.auth.basic.username }}
|
||||
- Password: (stored in secret {{ include "nextcloud-mcp-server.basicAuthSecretName" . }})
|
||||
{{- end }}
|
||||
- Connected to: {{ .Values.nextcloud.host }}
|
||||
{{- else if eq .Values.auth.mode "oauth" }}
|
||||
|
||||
3. OAuth Authentication Mode:
|
||||
- Server URL: {{ include "nextcloud-mcp-server.mcpServerUrl" . }}
|
||||
- Issuer URL: {{ include "nextcloud-mcp-server.publicIssuerUrl" . }}
|
||||
- Connected to: {{ .Values.nextcloud.host }}
|
||||
{{- if .Values.auth.oauth.existingSecret }}
|
||||
- Using existing OAuth client secret: {{ .Values.auth.oauth.existingSecret }}
|
||||
{{- else if and .Values.auth.oauth.clientId .Values.auth.oauth.clientSecret }}
|
||||
- Using pre-registered OAuth client
|
||||
{{- else }}
|
||||
- Using Dynamic Client Registration (DCR)
|
||||
{{- end }}
|
||||
{{- if .Values.auth.oauth.persistence.enabled }}
|
||||
- OAuth client credentials are persisted in PVC: {{ include "nextcloud-mcp-server.oauthPvcName" . }}
|
||||
{{- end }}
|
||||
|
||||
IMPORTANT: OAuth mode is experimental and requires patches to the user_oidc app.
|
||||
See: https://github.com/cbcoutinho/nextcloud-mcp-server#authentication
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.documentProcessing.enabled }}
|
||||
|
||||
4. Document Processing:
|
||||
- Enabled: {{ .Values.documentProcessing.enabled }}
|
||||
- Default processor: {{ .Values.documentProcessing.defaultProcessor }}
|
||||
{{- if .Values.documentProcessing.unstructured.enabled }}
|
||||
- Unstructured API: {{ .Values.documentProcessing.unstructured.apiUrl }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
For more information and documentation:
|
||||
- GitHub: https://github.com/cbcoutinho/nextcloud-mcp-server
|
||||
- Documentation: https://github.com/cbcoutinho/nextcloud-mcp-server#readme
|
||||
|
||||
To upgrade this deployment:
|
||||
helm upgrade {{ .Release.Name }} nextcloud-mcp-server
|
||||
|
||||
To uninstall:
|
||||
helm uninstall {{ .Release.Name }}
|
||||
@@ -0,0 +1,142 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||
If release name contains chart name it will be used as a full name.
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.labels" -}}
|
||||
helm.sh/chart: {{ include "nextcloud-mcp-server.chart" . }}
|
||||
{{ include "nextcloud-mcp-server.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "nextcloud-mcp-server.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "nextcloud-mcp-server.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the secret to use for basic auth
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.basicAuthSecretName" -}}
|
||||
{{- if .Values.auth.basic.existingSecret }}
|
||||
{{- .Values.auth.basic.existingSecret }}
|
||||
{{- else }}
|
||||
{{- include "nextcloud-mcp-server.fullname" . }}-basic-auth
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the secret to use for OAuth
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.oauthSecretName" -}}
|
||||
{{- if .Values.auth.oauth.existingSecret }}
|
||||
{{- .Values.auth.oauth.existingSecret }}
|
||||
{{- else }}
|
||||
{{- include "nextcloud-mcp-server.fullname" . }}-oauth
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the PVC to use for OAuth storage
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.oauthPvcName" -}}
|
||||
{{- if .Values.auth.oauth.persistence.existingClaim }}
|
||||
{{- .Values.auth.oauth.persistence.existingClaim }}
|
||||
{{- else }}
|
||||
{{- include "nextcloud-mcp-server.fullname" . }}-oauth-storage
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the MCP server port
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.port" -}}
|
||||
{{- .Values.mcp.port }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the image tag (always uses chart appVersion)
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.imageTag" -}}
|
||||
{{- .Chart.AppVersion }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the public issuer URL for OAuth
|
||||
Defaults to nextcloud.host if not specified
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.publicIssuerUrl" -}}
|
||||
{{- if .Values.nextcloud.publicIssuerUrl }}
|
||||
{{- .Values.nextcloud.publicIssuerUrl }}
|
||||
{{- else }}
|
||||
{{- .Values.nextcloud.host }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the MCP server URL for OAuth callbacks
|
||||
If not specified:
|
||||
- Uses ingress host if ingress is enabled
|
||||
- Otherwise defaults to http://localhost:8000 (for port-forward setups)
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.mcpServerUrl" -}}
|
||||
{{- if .Values.nextcloud.mcpServerUrl }}
|
||||
{{- .Values.nextcloud.mcpServerUrl }}
|
||||
{{- else if .Values.ingress.enabled }}
|
||||
{{- $host := index .Values.ingress.hosts 0 }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
{{- printf "https://%s" $host.host }}
|
||||
{{- else }}
|
||||
{{- printf "http://%s" $host.host }}
|
||||
{{- end }}
|
||||
{{- else }}
|
||||
{{- printf "http://localhost:%d" (int .Values.mcp.port) }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,190 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
spec:
|
||||
{{- if not .Values.autoscaling.enabled }}
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "nextcloud-mcp-server.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
checksum/secret: {{ include (print $.Template.BasePath "/secret.yaml") . | sha256sum }}
|
||||
{{- with .Values.podAnnotations }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 8 }}
|
||||
{{- with .Values.podLabels }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "nextcloud-mcp-server.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
{{- with .Values.initContainers }}
|
||||
initContainers:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.securityContext | nindent 12 }}
|
||||
image: "{{ .Values.image.repository }}:{{ include "nextcloud-mcp-server.imageTag" . }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
args:
|
||||
- "--transport"
|
||||
- "{{ .Values.mcp.transport }}"
|
||||
{{- if eq .Values.auth.mode "oauth" }}
|
||||
- "--oauth"
|
||||
- "--oauth-token-type"
|
||||
- "{{ .Values.auth.oauth.tokenType }}"
|
||||
{{- end }}
|
||||
{{- with .Values.mcp.extraArgs }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ include "nextcloud-mcp-server.port" . }}
|
||||
protocol: TCP
|
||||
env:
|
||||
# Nextcloud connection
|
||||
- name: NEXTCLOUD_HOST
|
||||
value: {{ .Values.nextcloud.host | quote }}
|
||||
{{- if eq .Values.auth.mode "basic" }}
|
||||
# Basic auth mode
|
||||
- name: NEXTCLOUD_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.basicAuthSecretName" . }}
|
||||
key: {{ .Values.auth.basic.usernameKey }}
|
||||
- name: NEXTCLOUD_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.basicAuthSecretName" . }}
|
||||
key: {{ .Values.auth.basic.passwordKey }}
|
||||
{{- else if eq .Values.auth.mode "oauth" }}
|
||||
# OAuth mode
|
||||
- name: NEXTCLOUD_MCP_SERVER_URL
|
||||
value: {{ include "nextcloud-mcp-server.mcpServerUrl" . | quote }}
|
||||
- name: NEXTCLOUD_PUBLIC_ISSUER_URL
|
||||
value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_STORAGE
|
||||
value: "/app/.oauth/nextcloud_oauth_client.json"
|
||||
- name: NEXTCLOUD_OIDC_SCOPES
|
||||
value: {{ .Values.auth.oauth.scopes | quote }}
|
||||
{{- if .Values.auth.oauth.clientId }}
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.oauthSecretName" . }}
|
||||
key: {{ .Values.auth.oauth.clientIdKey }}
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.oauthSecretName" . }}
|
||||
key: {{ .Values.auth.oauth.clientSecretKey }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if .Values.documentProcessing.enabled }}
|
||||
# Document processing
|
||||
- name: ENABLE_DOCUMENT_PROCESSING
|
||||
value: {{ .Values.documentProcessing.enabled | quote }}
|
||||
- name: DOCUMENT_PROCESSOR
|
||||
value: {{ .Values.documentProcessing.defaultProcessor | quote }}
|
||||
- name: PROGRESS_INTERVAL
|
||||
value: {{ .Values.documentProcessing.progressInterval | quote }}
|
||||
{{- if .Values.documentProcessing.unstructured.enabled }}
|
||||
- name: ENABLE_UNSTRUCTURED
|
||||
value: "true"
|
||||
- name: UNSTRUCTURED_API_URL
|
||||
value: {{ .Values.documentProcessing.unstructured.apiUrl | quote }}
|
||||
- name: UNSTRUCTURED_TIMEOUT
|
||||
value: {{ .Values.documentProcessing.unstructured.timeout | quote }}
|
||||
- name: UNSTRUCTURED_STRATEGY
|
||||
value: {{ .Values.documentProcessing.unstructured.strategy | quote }}
|
||||
- name: UNSTRUCTURED_LANGUAGES
|
||||
value: {{ .Values.documentProcessing.unstructured.languages | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.documentProcessing.tesseract.enabled }}
|
||||
- name: ENABLE_TESSERACT
|
||||
value: "true"
|
||||
{{- if .Values.documentProcessing.tesseract.cmd }}
|
||||
- name: TESSERACT_CMD
|
||||
value: {{ .Values.documentProcessing.tesseract.cmd | quote }}
|
||||
{{- end }}
|
||||
- name: TESSERACT_LANG
|
||||
value: {{ .Values.documentProcessing.tesseract.lang | quote }}
|
||||
{{- end }}
|
||||
{{- if .Values.documentProcessing.custom.enabled }}
|
||||
- name: ENABLE_CUSTOM_PROCESSOR
|
||||
value: "true"
|
||||
- name: CUSTOM_PROCESSOR_NAME
|
||||
value: {{ .Values.documentProcessing.custom.name | quote }}
|
||||
- name: CUSTOM_PROCESSOR_URL
|
||||
value: {{ .Values.documentProcessing.custom.url | quote }}
|
||||
{{- if .Values.documentProcessing.custom.apiKey }}
|
||||
- name: CUSTOM_PROCESSOR_API_KEY
|
||||
value: {{ .Values.documentProcessing.custom.apiKey | quote }}
|
||||
{{- end }}
|
||||
- name: CUSTOM_PROCESSOR_TIMEOUT
|
||||
value: {{ .Values.documentProcessing.custom.timeout | quote }}
|
||||
- name: CUSTOM_PROCESSOR_TYPES
|
||||
value: {{ .Values.documentProcessing.custom.types | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- with .Values.extraEnv }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.extraEnvFrom }}
|
||||
envFrom:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.livenessProbe | nindent 12 }}
|
||||
readinessProbe:
|
||||
{{- toYaml .Values.readinessProbe | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
volumeMounts:
|
||||
- name: tmp
|
||||
mountPath: /tmp
|
||||
{{- if and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled }}
|
||||
- name: oauth-storage
|
||||
mountPath: /app/.oauth
|
||||
{{- end }}
|
||||
{{- with .Values.volumeMounts }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
volumes:
|
||||
- name: tmp
|
||||
emptyDir: {}
|
||||
{{- if and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled }}
|
||||
- name: oauth-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "nextcloud-mcp-server.oauthPvcName" . }}
|
||||
{{- end }}
|
||||
{{- with .Values.volumes }}
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,32 @@
|
||||
{{- if .Values.autoscaling.enabled }}
|
||||
apiVersion: autoscaling/v2
|
||||
kind: HorizontalPodAutoscaler
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
spec:
|
||||
scaleTargetRef:
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
minReplicas: {{ .Values.autoscaling.minReplicas }}
|
||||
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
|
||||
metrics:
|
||||
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: cpu
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
- type: Resource
|
||||
resource:
|
||||
name: memory
|
||||
target:
|
||||
type: Utilization
|
||||
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,61 @@
|
||||
{{- if .Values.ingress.enabled -}}
|
||||
{{- $fullName := include "nextcloud-mcp-server.fullname" . -}}
|
||||
{{- $svcPort := .Values.service.port -}}
|
||||
{{- if and .Values.ingress.className (not (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion)) }}
|
||||
{{- if not (hasKey .Values.ingress.annotations "kubernetes.io/ingress.class") }}
|
||||
{{- $_ := set .Values.ingress.annotations "kubernetes.io/ingress.class" .Values.ingress.className}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- if semverCompare ">=1.19-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1
|
||||
{{- else if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}}
|
||||
apiVersion: networking.k8s.io/v1beta1
|
||||
{{- else -}}
|
||||
apiVersion: extensions/v1beta1
|
||||
{{- end }}
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ $fullName }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.ingress.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if and .Values.ingress.className (semverCompare ">=1.18-0" .Capabilities.KubeVersion.GitVersion) }}
|
||||
ingressClassName: {{ .Values.ingress.className }}
|
||||
{{- end }}
|
||||
{{- if .Values.ingress.tls }}
|
||||
tls:
|
||||
{{- range .Values.ingress.tls }}
|
||||
- hosts:
|
||||
{{- range .hosts }}
|
||||
- {{ . | quote }}
|
||||
{{- end }}
|
||||
secretName: {{ .secretName }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
rules:
|
||||
{{- range .Values.ingress.hosts }}
|
||||
- host: {{ .host | quote }}
|
||||
http:
|
||||
paths:
|
||||
{{- range .paths }}
|
||||
- path: {{ .path }}
|
||||
{{- if and .pathType (semverCompare ">=1.18-0" $.Capabilities.KubeVersion.GitVersion) }}
|
||||
pathType: {{ .pathType }}
|
||||
{{- end }}
|
||||
backend:
|
||||
{{- if semverCompare ">=1.19-0" $.Capabilities.KubeVersion.GitVersion }}
|
||||
service:
|
||||
name: {{ $fullName }}
|
||||
port:
|
||||
number: {{ $svcPort }}
|
||||
{{- else }}
|
||||
serviceName: {{ $fullName }}
|
||||
servicePort: {{ $svcPort }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,17 @@
|
||||
{{- if and (eq .Values.auth.mode "oauth") .Values.auth.oauth.persistence.enabled (not .Values.auth.oauth.persistence.existingClaim) }}
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}-oauth-storage
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
spec:
|
||||
accessModes:
|
||||
- {{ .Values.auth.oauth.persistence.accessMode }}
|
||||
{{- if .Values.auth.oauth.persistence.storageClass }}
|
||||
storageClassName: {{ .Values.auth.oauth.persistence.storageClass }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.auth.oauth.persistence.size }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,29 @@
|
||||
{{- if eq .Values.auth.mode "basic" }}
|
||||
{{- if not .Values.auth.basic.existingSecret }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}-basic-auth
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
type: Opaque
|
||||
data:
|
||||
{{ .Values.auth.basic.usernameKey }}: {{ .Values.auth.basic.username | b64enc | quote }}
|
||||
{{ .Values.auth.basic.passwordKey }}: {{ .Values.auth.basic.password | b64enc | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if eq .Values.auth.mode "oauth" }}
|
||||
{{- if and .Values.auth.oauth.clientId (not .Values.auth.oauth.existingSecret) }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}-oauth
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
type: Opaque
|
||||
data:
|
||||
{{ .Values.auth.oauth.clientIdKey }}: {{ .Values.auth.oauth.clientId | b64enc | quote }}
|
||||
{{ .Values.auth.oauth.clientSecretKey }}: {{ .Values.auth.oauth.clientSecret | b64enc | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,19 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.service.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
{{- include "nextcloud-mcp-server.selectorLabels" . | nindent 4 }}
|
||||
@@ -0,0 +1,13 @@
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
automountServiceAccountToken: {{ .Values.serviceAccount.automount }}
|
||||
{{- end }}
|
||||
@@ -0,0 +1,266 @@
|
||||
# Default values for nextcloud-mcp-server
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# Number of replicas
|
||||
replicaCount: 1
|
||||
|
||||
image:
|
||||
repository: ghcr.io/cbcoutinho/nextcloud-mcp-server
|
||||
pullPolicy: IfNotPresent
|
||||
# Image tag is automatically set to chart appVersion
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
# Nextcloud connection settings
|
||||
nextcloud:
|
||||
# URL of your Nextcloud instance (required)
|
||||
# Example: https://cloud.example.com
|
||||
host: ""
|
||||
|
||||
# MCP server URL for OAuth callbacks (OAuth mode only)
|
||||
# If not specified, will be constructed from ingress.hosts[0] if ingress is enabled,
|
||||
# or defaults to http://localhost:8000 (suitable for port-forward setups)
|
||||
# Example: https://mcp.example.com
|
||||
mcpServerUrl: ""
|
||||
|
||||
# Public issuer URL for OAuth (OAuth mode only)
|
||||
# If not specified, defaults to nextcloud.host
|
||||
# Only set this if your Nextcloud is accessible at a different URL for OAuth
|
||||
# Example: https://cloud.example.com
|
||||
publicIssuerUrl: ""
|
||||
|
||||
# Authentication configuration
|
||||
# Choose either basic auth OR oauth (not both)
|
||||
auth:
|
||||
# Authentication mode: "basic" or "oauth"
|
||||
# basic: Uses username/password (recommended for most users)
|
||||
# oauth: Uses OAuth2/OIDC (experimental, requires patches)
|
||||
mode: basic
|
||||
|
||||
# Basic authentication settings
|
||||
basic:
|
||||
# Nextcloud username (ignored if existingSecret is set)
|
||||
username: ""
|
||||
# Nextcloud password or app password (recommended) (ignored if existingSecret is set)
|
||||
password: ""
|
||||
# Use existing secret instead of creating one
|
||||
# If set, username and password above are ignored
|
||||
# Secret must contain keys specified in usernameKey and passwordKey
|
||||
# Example:
|
||||
# kubectl create secret generic my-nextcloud-creds \
|
||||
# --from-literal=username=myuser \
|
||||
# --from-literal=password=mypassword
|
||||
existingSecret: ""
|
||||
# Keys in the existing secret
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
|
||||
# OAuth2/OIDC settings (experimental)
|
||||
oauth:
|
||||
# OAuth token type: "jwt" or "opaque"
|
||||
tokenType: "jwt"
|
||||
# Pre-registered OAuth client ID (optional, ignored if existingSecret is set)
|
||||
# If not provided and no existingSecret, will use Dynamic Client Registration (DCR)
|
||||
clientId: ""
|
||||
# Pre-registered OAuth client secret (optional, ignored if existingSecret is set)
|
||||
clientSecret: ""
|
||||
# OAuth scopes to request (space-separated)
|
||||
scopes: "openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write"
|
||||
# Use existing secret for OAuth client credentials
|
||||
# If set, clientId and clientSecret above are ignored
|
||||
# Secret must contain keys specified in clientIdKey and clientSecretKey
|
||||
# Example:
|
||||
# kubectl create secret generic my-oauth-creds \
|
||||
# --from-literal=clientId=my-client-id \
|
||||
# --from-literal=clientSecret=my-client-secret
|
||||
existingSecret: ""
|
||||
# Keys in the existing secret
|
||||
clientIdKey: "clientId"
|
||||
clientSecretKey: "clientSecret"
|
||||
# Persistent storage for OAuth client credentials
|
||||
persistence:
|
||||
enabled: true
|
||||
# Storage class (leave empty for default)
|
||||
storageClass: ""
|
||||
accessMode: ReadWriteOnce
|
||||
size: 100Mi
|
||||
# Use existing PVC
|
||||
existingClaim: ""
|
||||
|
||||
# MCP server configuration
|
||||
mcp:
|
||||
# Transport mode (default: streamable-http for SSE)
|
||||
transport: "streamable-http"
|
||||
# Port for MCP server (both basic auth and OAuth modes)
|
||||
port: 8000
|
||||
# Additional command-line arguments to pass to nextcloud-mcp-server
|
||||
# Example: ["--log-level", "debug", "--enable-app", "notes"]
|
||||
extraArgs: []
|
||||
|
||||
# Document processing configuration (optional)
|
||||
documentProcessing:
|
||||
# Enable document processing (PDF, DOCX, images, etc.)
|
||||
enabled: false
|
||||
# Default processor: unstructured, tesseract, or custom
|
||||
defaultProcessor: "unstructured"
|
||||
# Progress reporting interval in seconds
|
||||
progressInterval: 10
|
||||
|
||||
# Unstructured.io processor
|
||||
unstructured:
|
||||
enabled: false
|
||||
# Unstructured API endpoint
|
||||
apiUrl: "http://unstructured:8000"
|
||||
# Request timeout in seconds
|
||||
timeout: 120
|
||||
# Parsing strategy: auto, fast, or hi_res
|
||||
strategy: "auto"
|
||||
# OCR languages (comma-separated ISO 639-3 codes)
|
||||
languages: "eng,deu"
|
||||
|
||||
# Tesseract processor (local OCR)
|
||||
tesseract:
|
||||
enabled: false
|
||||
# Path to tesseract executable (optional, auto-detected if in PATH)
|
||||
cmd: ""
|
||||
# OCR language (e.g., eng, deu, eng+deu for multiple)
|
||||
lang: "eng"
|
||||
|
||||
# Custom processor
|
||||
custom:
|
||||
enabled: false
|
||||
# Unique name for your processor
|
||||
name: "my_ocr"
|
||||
# Custom processor API endpoint
|
||||
url: ""
|
||||
# Optional API key for authentication
|
||||
apiKey: ""
|
||||
# Request timeout in seconds
|
||||
timeout: 60
|
||||
# Comma-separated MIME types your processor supports
|
||||
types: "application/pdf,image/jpeg,image/png"
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Automatically mount a ServiceAccount's API credentials?
|
||||
automount: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
|
||||
podAnnotations: {}
|
||||
podLabels: {}
|
||||
|
||||
podSecurityContext:
|
||||
fsGroup: 2000
|
||||
|
||||
securityContext:
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8000
|
||||
annotations: {}
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations: {}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
# cert-manager.io/cluster-issuer: letsencrypt-prod
|
||||
hosts:
|
||||
- host: mcp.example.com
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls: []
|
||||
# - secretName: nextcloud-mcp-tls
|
||||
# hosts:
|
||||
# - mcp.example.com
|
||||
|
||||
resources:
|
||||
# We recommend setting resource requests and limits
|
||||
limits:
|
||||
cpu: 1000m
|
||||
memory: 512Mi
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
|
||||
# Liveness probe configuration
|
||||
# Checks if the application process is running
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health/live
|
||||
port: http
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
|
||||
# Readiness probe configuration
|
||||
# Checks if the application is ready to serve traffic
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health/ready
|
||||
port: http
|
||||
scheme: HTTP
|
||||
initialDelaySeconds: 10
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 3
|
||||
failureThreshold: 3
|
||||
|
||||
# Autoscaling configuration
|
||||
autoscaling:
|
||||
enabled: false
|
||||
minReplicas: 1
|
||||
maxReplicas: 10
|
||||
targetCPUUtilizationPercentage: 80
|
||||
# targetMemoryUtilizationPercentage: 80
|
||||
|
||||
# Additional volumes on the output Deployment definition.
|
||||
volumes: []
|
||||
# - name: foo
|
||||
# secret:
|
||||
# secretName: mysecret
|
||||
# optional: false
|
||||
|
||||
# Additional volumeMounts on the output Deployment definition.
|
||||
volumeMounts: []
|
||||
# - name: foo
|
||||
# mountPath: "/etc/foo"
|
||||
# readOnly: true
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
||||
|
||||
# Init containers
|
||||
initContainers: []
|
||||
|
||||
# Additional environment variables
|
||||
extraEnv: []
|
||||
# - name: CUSTOM_VAR
|
||||
# value: "custom_value"
|
||||
|
||||
# Additional environment variables from ConfigMaps or Secrets
|
||||
extraEnvFrom: []
|
||||
# - configMapRef:
|
||||
# name: my-configmap
|
||||
# - secretRef:
|
||||
# name: my-secret
|
||||
+12
-2
@@ -21,7 +21,7 @@ services:
|
||||
restart: always
|
||||
|
||||
app:
|
||||
image: docker.io/library/nextcloud:32.0.0@sha256:f9bec5c77a8d5603354b990550a4d24487deae6e589dd20ce870e43e28460e18
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:1e4eae55eebe094cae6f9e7b6e0b4bccf4a4fe7b7e6f6f8f57010994b3b2ee42
|
||||
restart: always
|
||||
ports:
|
||||
- 0.0.0.0:8080:80
|
||||
@@ -45,12 +45,22 @@ services:
|
||||
- REDIS_HOST=redis
|
||||
|
||||
recipes:
|
||||
image: docker.io/library/nginx:alpine@sha256:61e01287e546aac28a3f56839c136b31f590273f3b41187a36f46f6a03bbfe22
|
||||
image: docker.io/library/nginx:alpine@sha256:b3c656d55d7ad751196f21b7fd2e8d4da9cb430e32f646adcf92441b72f82b14
|
||||
restart: always
|
||||
volumes:
|
||||
- ./tests/fixtures/test_recipe.html:/usr/share/nginx/html/test_recipe.html:ro
|
||||
- ./tests/fixtures/nginx.conf:/etc/nginx/nginx.conf:ro
|
||||
|
||||
unstructured:
|
||||
image: downloads.unstructured.io/unstructured-io/unstructured-api:latest@sha256:a43ab55898599157fb0e0e097dabb8ecdd1d8e3df1ae5b67c6e15a136b171a6c
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:8002:8000
|
||||
# Unstructured API runs on port 8000 internally
|
||||
# We expose it on 8002 externally to avoid conflict
|
||||
profiles:
|
||||
- unstructured
|
||||
|
||||
mcp:
|
||||
build: .
|
||||
command: ["--transport", "streamable-http"]
|
||||
|
||||
@@ -0,0 +1,795 @@
|
||||
# ADR-002: Vector Database Background Sync Authentication
|
||||
|
||||
## Status
|
||||
Proposed
|
||||
|
||||
## Context
|
||||
|
||||
To enable semantic search capabilities, the MCP server needs to index user content (notes, files, calendar events) into a vector database. This requires a background sync worker that:
|
||||
|
||||
1. **Runs independently** of user requests (periodic or continuous operation)
|
||||
2. **Accesses multiple users' content** to build a comprehensive search index
|
||||
3. **Respects user permissions** - only index content users have access to
|
||||
4. **Operates in OAuth mode** - where the MCP server doesn't have traditional admin credentials
|
||||
|
||||
### Current OAuth Architecture
|
||||
|
||||
The MCP server currently operates in two authentication modes:
|
||||
|
||||
1. **BasicAuth Mode**: Uses username/password credentials (typically admin account)
|
||||
2. **OAuth Mode**: Single OAuth client, multiple user tokens
|
||||
- Users authenticate via OAuth flow
|
||||
- Each request includes user's access token
|
||||
- Server creates per-request `NextcloudClient` with user's bearer token
|
||||
- No tokens are stored server-side
|
||||
|
||||
### The Challenge
|
||||
|
||||
Background workers need long-lived authentication to:
|
||||
- Index content continuously/periodically
|
||||
- Process multiple users' data in batch operations
|
||||
- Operate when users are not actively making requests
|
||||
|
||||
However, in OAuth mode:
|
||||
- User access tokens are ephemeral (exist only during request)
|
||||
- MCP server doesn't store user credentials
|
||||
- Admin credentials defeat the purpose of OAuth
|
||||
|
||||
We need an OAuth-native solution that maintains security while enabling background operations.
|
||||
|
||||
## Decision
|
||||
|
||||
We will implement a **tiered authentication strategy** that leverages OAuth standards with graceful fallback:
|
||||
|
||||
### Primary Strategy: OAuth-Based Authentication
|
||||
|
||||
**Tier 1: Offline Access with Refresh Tokens** (Preferred)
|
||||
- Request `offline_access` scope during OAuth client registration
|
||||
- Receive and securely store user refresh tokens
|
||||
- Background worker exchanges refresh tokens for access tokens as needed
|
||||
- Respects per-user permissions and provides full audit trail
|
||||
|
||||
**Tier 2: Token Exchange (RFC 8693)** (If supported)
|
||||
- Service account exchanges its token for user-scoped tokens on-demand
|
||||
- No token storage required
|
||||
- Only available if OIDC provider implements RFC 8693
|
||||
|
||||
### Fallback Strategy: Admin Credentials
|
||||
|
||||
**Tier 3: Admin BasicAuth** (Development/Simple Deployments)
|
||||
- Dedicated sync account with read-only permissions
|
||||
- Clear documentation of security implications
|
||||
- Recommended only for trusted environments
|
||||
|
||||
### Key Architectural Principles
|
||||
|
||||
1. **Capability Detection**: Automatically detect which OAuth methods are supported
|
||||
2. **Dual-Phase Authorization**:
|
||||
- Sync worker indexes with service credentials
|
||||
- User requests verify access with user's OAuth token
|
||||
3. **Defense in Depth**: Vector database is search accelerator, not security boundary
|
||||
4. **Separation of Concerns**: Sync credentials ≠ Request credentials
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### 1. Offline Access Flow (Tier 1)
|
||||
|
||||
#### 1.1 Client Registration
|
||||
```python
|
||||
# During OAuth client registration
|
||||
client_metadata = {
|
||||
"client_name": "Nextcloud MCP Server",
|
||||
"redirect_uris": ["http://localhost:8000/oauth/callback"],
|
||||
"grant_types": ["authorization_code", "refresh_token"],
|
||||
"scope": "openid profile email offline_access notes:read files:read ...",
|
||||
"token_type": "Bearer" # or "jwt"
|
||||
}
|
||||
```
|
||||
|
||||
#### 1.2 Token Storage
|
||||
```python
|
||||
# Encrypted token storage
|
||||
class RefreshTokenStorage:
|
||||
"""Securely store and manage user refresh tokens"""
|
||||
|
||||
def __init__(self, db_path: str, encryption_key: bytes):
|
||||
self.db = Database(db_path)
|
||||
self.cipher = Fernet(encryption_key)
|
||||
|
||||
async def store_refresh_token(
|
||||
self,
|
||||
user_id: str,
|
||||
refresh_token: str,
|
||||
expires_at: int | None = None
|
||||
):
|
||||
"""Store encrypted refresh token for user"""
|
||||
encrypted_token = self.cipher.encrypt(refresh_token.encode())
|
||||
await self.db.execute(
|
||||
"INSERT OR REPLACE INTO refresh_tokens VALUES (?, ?, ?, ?)",
|
||||
(user_id, encrypted_token, expires_at, int(time.time()))
|
||||
)
|
||||
|
||||
async def get_refresh_token(self, user_id: str) -> str | None:
|
||||
"""Retrieve and decrypt refresh token"""
|
||||
row = await self.db.fetch_one(
|
||||
"SELECT encrypted_token FROM refresh_tokens WHERE user_id = ?",
|
||||
(user_id,)
|
||||
)
|
||||
if row:
|
||||
return self.cipher.decrypt(row[0]).decode()
|
||||
return None
|
||||
```
|
||||
|
||||
#### 1.3 Token Refresh Flow
|
||||
```python
|
||||
async def get_user_access_token(user_id: str) -> str:
|
||||
"""Exchange refresh token for fresh access token"""
|
||||
|
||||
# Retrieve stored refresh token
|
||||
refresh_token = await token_storage.get_refresh_token(user_id)
|
||||
if not refresh_token:
|
||||
raise ValueError(f"No refresh token for user {user_id}")
|
||||
|
||||
# Exchange for access token
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
response.raise_for_status()
|
||||
token_data = response.json()
|
||||
|
||||
# Store new refresh token if rotated
|
||||
if "refresh_token" in token_data:
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
token_data["refresh_token"],
|
||||
token_data.get("refresh_expires_in")
|
||||
)
|
||||
|
||||
return token_data["access_token"]
|
||||
```
|
||||
|
||||
#### 1.4 Capturing Refresh Tokens
|
||||
|
||||
**Challenge**: MCP protocol doesn't expose refresh tokens to server
|
||||
|
||||
**Solution**: Intercept OAuth callback
|
||||
```python
|
||||
# Add route to MCP server
|
||||
@app.route("/oauth/callback")
|
||||
async def oauth_callback(request):
|
||||
"""Capture OAuth callback and store refresh token"""
|
||||
|
||||
code = request.query_params.get("code")
|
||||
state = request.query_params.get("state")
|
||||
|
||||
# Exchange authorization code for tokens
|
||||
token_response = await exchange_authorization_code(code)
|
||||
|
||||
# Extract user info
|
||||
userinfo = await get_userinfo(token_response["access_token"])
|
||||
user_id = userinfo["sub"]
|
||||
|
||||
# Store refresh token (if present)
|
||||
if "refresh_token" in token_response:
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
token_response["refresh_token"],
|
||||
expires_at=token_response.get("refresh_expires_in")
|
||||
)
|
||||
logger.info(f"Stored refresh token for user: {user_id}")
|
||||
|
||||
# Continue MCP OAuth flow
|
||||
return redirect_to_mcp_client(state, token_response)
|
||||
```
|
||||
|
||||
### 2. Token Exchange Flow (Tier 2)
|
||||
|
||||
#### 2.1 Capability Detection
|
||||
```python
|
||||
async def check_token_exchange_support(discovery_url: str) -> bool:
|
||||
"""Check if OIDC provider supports RFC 8693 token exchange"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(discovery_url)
|
||||
discovery = response.json()
|
||||
|
||||
# Check for token exchange grant type
|
||||
grant_types = discovery.get("grant_types_supported", [])
|
||||
return "urn:ietf:params:oauth:grant-type:token-exchange" in grant_types
|
||||
```
|
||||
|
||||
#### 2.2 Token Exchange Implementation
|
||||
```python
|
||||
async def exchange_for_user_token(
|
||||
service_token: str,
|
||||
user_id: str,
|
||||
scopes: list[str]
|
||||
) -> str:
|
||||
"""Exchange service token for user-scoped token"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
|
||||
"subject_token": service_token,
|
||||
"subject_token_type": "urn:ietf:params:oauth:token-type:access_token",
|
||||
"requested_token_type": "urn:ietf:params:oauth:token-type:access_token",
|
||||
"resource": f"user:{user_id}",
|
||||
"scope": " ".join(scopes)
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Token exchange failed: {response.status_code}")
|
||||
raise TokenExchangeNotSupportedError()
|
||||
|
||||
return response.json()["access_token"]
|
||||
```
|
||||
|
||||
#### 2.3 Service Account Token
|
||||
```python
|
||||
async def get_service_token() -> str:
|
||||
"""Get token for MCP server's service account"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "client_credentials",
|
||||
"scope": "notes:read files:read calendar:read"
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["access_token"]
|
||||
```
|
||||
|
||||
### 3. Sync Worker with Tiered Authentication
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/sync_worker.py
|
||||
class VectorSyncWorker:
|
||||
"""Background worker for indexing content into vector database"""
|
||||
|
||||
def __init__(self):
|
||||
self.auth_method = None
|
||||
self.token_storage = None
|
||||
self.vector_service = None
|
||||
|
||||
async def initialize(self):
|
||||
"""Detect and configure authentication method"""
|
||||
|
||||
# Try Tier 1: Offline Access
|
||||
if os.getenv("ENABLE_OFFLINE_ACCESS") == "true":
|
||||
try:
|
||||
encryption_key = os.getenv("TOKEN_ENCRYPTION_KEY")
|
||||
self.token_storage = RefreshTokenStorage(
|
||||
db_path="tokens.db",
|
||||
encryption_key=base64.b64decode(encryption_key)
|
||||
)
|
||||
self.auth_method = "offline_access"
|
||||
logger.info("✓ Using offline_access authentication")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Offline access unavailable: {e}")
|
||||
|
||||
# Try Tier 2: Token Exchange
|
||||
try:
|
||||
if await check_token_exchange_support(discovery_url):
|
||||
self.auth_method = "token_exchange"
|
||||
logger.info("✓ Using token exchange authentication (RFC 8693)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Token exchange unavailable: {e}")
|
||||
|
||||
# Fallback: Admin Credentials
|
||||
if os.getenv("NEXTCLOUD_USERNAME") and os.getenv("NEXTCLOUD_PASSWORD"):
|
||||
self.auth_method = "admin_basic"
|
||||
logger.warning(
|
||||
"⚠ Using admin BasicAuth authentication. "
|
||||
"Consider enabling offline_access for production."
|
||||
)
|
||||
return
|
||||
|
||||
raise RuntimeError("No authentication method available for sync worker")
|
||||
|
||||
async def get_user_client(self, user_id: str) -> NextcloudClient:
|
||||
"""Get authenticated client for user based on auth method"""
|
||||
|
||||
if self.auth_method == "offline_access":
|
||||
# Exchange refresh token for access token
|
||||
access_token = await get_user_access_token(user_id)
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host,
|
||||
token=access_token,
|
||||
username=user_id
|
||||
)
|
||||
|
||||
elif self.auth_method == "token_exchange":
|
||||
# Get service token and exchange for user token
|
||||
service_token = await get_service_token()
|
||||
user_token = await exchange_for_user_token(
|
||||
service_token,
|
||||
user_id,
|
||||
scopes=["notes:read", "files:read"]
|
||||
)
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host,
|
||||
token=user_token,
|
||||
username=user_id
|
||||
)
|
||||
|
||||
elif self.auth_method == "admin_basic":
|
||||
# Use admin credentials (fallback)
|
||||
return NextcloudClient.from_env()
|
||||
|
||||
raise RuntimeError(f"Unknown auth method: {self.auth_method}")
|
||||
|
||||
async def sync_user_content(self, user_id: str):
|
||||
"""Index a user's content into vector database"""
|
||||
|
||||
try:
|
||||
# Get authenticated client for this user
|
||||
client = await self.get_user_client(user_id)
|
||||
|
||||
# Sync notes
|
||||
notes = await client.notes.list_notes()
|
||||
for note in notes:
|
||||
embedding = await self.vector_service.embed(note.content)
|
||||
await self.vector_service.upsert(
|
||||
collection="nextcloud_content",
|
||||
id=f"note_{note.id}",
|
||||
vector=embedding,
|
||||
metadata={
|
||||
"user_id": user_id,
|
||||
"content_type": "note",
|
||||
"note_id": note.id,
|
||||
"title": note.title,
|
||||
"category": note.category
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Synced {len(notes)} notes for user: {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to sync user {user_id}: {e}")
|
||||
|
||||
async def run(self):
|
||||
"""Main sync loop"""
|
||||
|
||||
await self.initialize()
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Get list of users to sync
|
||||
if self.auth_method == "admin_basic":
|
||||
# Admin can list all users
|
||||
admin_client = NextcloudClient.from_env()
|
||||
users = await admin_client.users.list_users()
|
||||
user_ids = [u.id for u in users]
|
||||
else:
|
||||
# OAuth methods: only sync users with stored tokens
|
||||
user_ids = await self.token_storage.get_all_user_ids()
|
||||
|
||||
logger.info(f"Syncing content for {len(user_ids)} users")
|
||||
|
||||
for user_id in user_ids:
|
||||
await self.sync_user_content(user_id)
|
||||
|
||||
logger.info("Sync complete, sleeping...")
|
||||
await asyncio.sleep(300) # 5 minutes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Sync failed: {e}")
|
||||
await asyncio.sleep(60) # Retry after 1 minute
|
||||
```
|
||||
|
||||
### 4. User Request Verification (Dual-Phase Authorization)
|
||||
|
||||
```python
|
||||
@mcp.tool()
|
||||
@require_scopes("notes:read")
|
||||
async def nc_notes_semantic_search(
|
||||
query: str,
|
||||
ctx: Context,
|
||||
limit: int = 10
|
||||
) -> SemanticSearchResponse:
|
||||
"""Semantic search with permission verification"""
|
||||
|
||||
# Get user's OAuth client (uses their access token from request)
|
||||
user_client = get_client(ctx)
|
||||
username = user_client.username
|
||||
|
||||
# Phase 1: Vector search (fast, may include false positives)
|
||||
embedding = await vector_service.embed(query)
|
||||
candidate_results = await qdrant.search(
|
||||
collection_name="nextcloud_content",
|
||||
query_vector=embedding,
|
||||
query_filter={
|
||||
"must": [
|
||||
{
|
||||
"should": [
|
||||
{"key": "user_id", "match": {"value": username}},
|
||||
{"key": "shared_with", "match": {"any": [username]}}
|
||||
]
|
||||
},
|
||||
{"key": "content_type", "match": {"value": "note"}}
|
||||
]
|
||||
},
|
||||
limit=limit * 2 # Get extra candidates
|
||||
)
|
||||
|
||||
# Phase 2: Verify access via Nextcloud API (authoritative)
|
||||
verified_results = []
|
||||
for candidate in candidate_results:
|
||||
note_id = candidate.payload["note_id"]
|
||||
try:
|
||||
# This uses user's OAuth token - will fail if no access
|
||||
note = await user_client.notes.get_note(note_id)
|
||||
verified_results.append({
|
||||
"note": note,
|
||||
"score": candidate.score
|
||||
})
|
||||
if len(verified_results) >= limit:
|
||||
break
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 403:
|
||||
# User doesn't have access - skip silently
|
||||
logger.debug(f"Filtered out note {note_id} for {username}")
|
||||
continue
|
||||
raise
|
||||
|
||||
return SemanticSearchResponse(results=verified_results)
|
||||
```
|
||||
|
||||
### 5. Security Implementation
|
||||
|
||||
#### 5.1 Token Encryption
|
||||
```python
|
||||
# Generate encryption key (store securely)
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
# On first setup
|
||||
encryption_key = Fernet.generate_key()
|
||||
# Store in environment or secrets manager
|
||||
# NEVER commit to source control
|
||||
|
||||
# In production
|
||||
encryption_key = os.getenv("TOKEN_ENCRYPTION_KEY") # Base64-encoded Fernet key
|
||||
```
|
||||
|
||||
#### 5.2 Token Rotation
|
||||
```python
|
||||
async def rotate_refresh_token(user_id: str):
|
||||
"""Handle refresh token rotation"""
|
||||
|
||||
old_refresh_token = await token_storage.get_refresh_token(user_id)
|
||||
|
||||
# Exchange for new tokens
|
||||
response = await exchange_refresh_token(old_refresh_token)
|
||||
|
||||
if "refresh_token" in response:
|
||||
# Store new refresh token
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
response["refresh_token"],
|
||||
expires_at=response.get("refresh_expires_in")
|
||||
)
|
||||
|
||||
# Securely delete old token
|
||||
await token_storage.delete_refresh_token(user_id, old_refresh_token)
|
||||
```
|
||||
|
||||
#### 5.3 Audit Logging
|
||||
```python
|
||||
async def audit_log(
|
||||
event: str,
|
||||
user_id: str,
|
||||
resource_type: str,
|
||||
resource_id: str,
|
||||
auth_method: str
|
||||
):
|
||||
"""Log sync operations for audit trail"""
|
||||
|
||||
await audit_db.execute(
|
||||
"INSERT INTO audit_logs VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
int(time.time()),
|
||||
event, # "index_note", "index_file"
|
||||
user_id,
|
||||
resource_type,
|
||||
resource_id,
|
||||
auth_method,
|
||||
socket.gethostname()
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Configuration
|
||||
|
||||
#### 6.1 Environment Variables
|
||||
```bash
|
||||
# Tier 1: Offline Access
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
TOKEN_ENCRYPTION_KEY=<base64-encoded-fernet-key>
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# Tier 2: Token Exchange (auto-detected)
|
||||
# No configuration needed - detected via OIDC discovery
|
||||
|
||||
# Tier 3: Admin Fallback
|
||||
NEXTCLOUD_USERNAME=sync-bot
|
||||
NEXTCLOUD_PASSWORD=<secure-password>
|
||||
|
||||
# Vector Database
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
QDRANT_API_KEY=<api-key>
|
||||
|
||||
# Sync Configuration
|
||||
SYNC_INTERVAL_SECONDS=300
|
||||
SYNC_BATCH_SIZE=100
|
||||
```
|
||||
|
||||
#### 6.2 Docker Compose
|
||||
```yaml
|
||||
services:
|
||||
mcp-sync:
|
||||
build: .
|
||||
command: ["python", "-m", "nextcloud_mcp_server.sync_worker"]
|
||||
environment:
|
||||
- NEXTCLOUD_HOST=http://app:80
|
||||
- ENABLE_OFFLINE_ACCESS=true
|
||||
- TOKEN_ENCRYPTION_KEY=${TOKEN_ENCRYPTION_KEY}
|
||||
- QDRANT_URL=http://qdrant:6333
|
||||
# OAuth client credentials (for token refresh)
|
||||
- NEXTCLOUD_OIDC_CLIENT_ID=${NEXTCLOUD_OIDC_CLIENT_ID}
|
||||
- NEXTCLOUD_OIDC_CLIENT_SECRET=${NEXTCLOUD_OIDC_CLIENT_SECRET}
|
||||
volumes:
|
||||
- sync-tokens:/app/data
|
||||
depends_on:
|
||||
- app
|
||||
- qdrant
|
||||
|
||||
volumes:
|
||||
sync-tokens: # Persistent storage for encrypted tokens
|
||||
```
|
||||
|
||||
## Consequences
|
||||
|
||||
### Benefits
|
||||
|
||||
1. **OAuth-Native Authentication**
|
||||
- Leverages standard OAuth flows (offline_access, token exchange)
|
||||
- No reliance on admin passwords in production
|
||||
- Compatible with enterprise OIDC providers
|
||||
|
||||
2. **User-Level Permissions**
|
||||
- Each user's content indexed with their own credentials
|
||||
- Respects sharing, permissions, and access controls
|
||||
- Full audit trail of which user's token was used
|
||||
|
||||
3. **Security**
|
||||
- Tokens encrypted at rest
|
||||
- Short-lived access tokens (refreshed as needed)
|
||||
- Token rotation support
|
||||
- Defense in depth with dual-phase authorization
|
||||
|
||||
4. **Flexibility**
|
||||
- Automatic capability detection
|
||||
- Graceful degradation through authentication tiers
|
||||
- Works with varying OIDC provider capabilities
|
||||
|
||||
5. **Operational**
|
||||
- Background sync independent of user activity
|
||||
- Efficient batch processing
|
||||
- Clear separation of sync vs request credentials
|
||||
|
||||
### Limitations
|
||||
|
||||
1. **Complexity**
|
||||
- Multiple authentication paths to maintain
|
||||
- Token storage and encryption infrastructure
|
||||
- More moving parts than simple admin auth
|
||||
|
||||
2. **User Experience**
|
||||
- `offline_access` scope may require additional consent
|
||||
- Users must authenticate at least once for indexing
|
||||
- New users not automatically indexed
|
||||
|
||||
3. **OIDC Provider Dependency**
|
||||
- Token exchange requires RFC 8693 support (rare)
|
||||
- Refresh token rotation varies by provider
|
||||
- Some providers may not support offline_access
|
||||
|
||||
4. **Operational Overhead**
|
||||
- Token database maintenance
|
||||
- Monitoring token expiration
|
||||
- Handling revoked tokens gracefully
|
||||
|
||||
### Security Considerations
|
||||
|
||||
#### Threat Model
|
||||
|
||||
**Threat 1: Token Storage Breach**
|
||||
- **Mitigation**: Encryption at rest using Fernet
|
||||
- **Mitigation**: Secure key management (secrets manager)
|
||||
- **Mitigation**: Minimal token lifetime
|
||||
- **Detection**: Audit logs for unusual access patterns
|
||||
|
||||
**Threat 2: Token Replay**
|
||||
- **Mitigation**: Short-lived access tokens (refreshed frequently)
|
||||
- **Mitigation**: Token rotation on each refresh
|
||||
- **Mitigation**: Revocation support
|
||||
|
||||
**Threat 3: Privilege Escalation**
|
||||
- **Mitigation**: Dual-phase authorization (vector DB + Nextcloud API)
|
||||
- **Mitigation**: Sync worker uses same scopes as user requests
|
||||
- **Mitigation**: Per-user token isolation
|
||||
|
||||
**Threat 4: Vector Database Poisoning**
|
||||
- **Mitigation**: User requests always verify via Nextcloud API
|
||||
- **Mitigation**: Vector DB is cache/accelerator, not source of truth
|
||||
- **Mitigation**: Sync operations audited per user
|
||||
|
||||
#### Security Best Practices
|
||||
|
||||
1. **Token Encryption Key Management**
|
||||
```bash
|
||||
# Generate secure key
|
||||
python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
|
||||
# Store in secrets manager (Vault, AWS Secrets Manager, etc.)
|
||||
# Or use environment variable with restricted permissions
|
||||
```
|
||||
|
||||
2. **Token Storage Permissions**
|
||||
```bash
|
||||
# Restrict database file permissions
|
||||
chmod 600 /app/data/tokens.db
|
||||
chown mcp-server:mcp-server /app/data/tokens.db
|
||||
```
|
||||
|
||||
3. **Token Rotation Schedule**
|
||||
- Refresh access tokens every 5 minutes (or token expiry)
|
||||
- Rotate refresh tokens on each use (if provider supports)
|
||||
- Revoke tokens on user logout/deauthorization
|
||||
|
||||
4. **Monitoring and Alerting**
|
||||
- Alert on token refresh failures
|
||||
- Monitor for unusual access patterns
|
||||
- Track token age and rotation
|
||||
- Audit sync operations per user
|
||||
|
||||
### Future Enhancements
|
||||
|
||||
1. **Token Revocation Handling**
|
||||
- Webhook endpoint for token revocation events
|
||||
- Periodic validation of stored tokens
|
||||
- Graceful handling of revoked tokens
|
||||
|
||||
2. **Selective Sync**
|
||||
- Allow users to opt-in/opt-out of indexing
|
||||
- Per-content-type sync preferences
|
||||
- Privacy controls for sensitive content
|
||||
|
||||
3. **Multi-Tenant Token Storage**
|
||||
- Separate token databases per tenant
|
||||
- Key rotation per tenant
|
||||
- Tenant isolation
|
||||
|
||||
4. **Token Lifecycle Management**
|
||||
- Automatic cleanup of expired tokens
|
||||
- Token usage analytics
|
||||
- Token health dashboard
|
||||
|
||||
5. **Alternative OAuth Flows**
|
||||
- Device flow for headless sync
|
||||
- Resource owner password credentials (ROPC) as fallback
|
||||
- SAML assertion grants
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### Alternative 1: Admin BasicAuth Only
|
||||
|
||||
**Approach**: Background worker always uses admin credentials
|
||||
|
||||
**Pros**:
|
||||
- Simple implementation
|
||||
- No token storage complexity
|
||||
- Works with any authentication backend
|
||||
|
||||
**Cons**:
|
||||
- Violates principle of least privilege
|
||||
- Single powerful credential
|
||||
- No per-user audit trail
|
||||
- Bypasses OAuth entirely
|
||||
|
||||
**Decision**: Rejected for production use; kept as fallback only
|
||||
|
||||
### Alternative 2: Client Credentials Grant Only
|
||||
|
||||
**Approach**: Service account with broad read permissions
|
||||
|
||||
**Pros**:
|
||||
- OAuth-native pattern
|
||||
- No user token storage
|
||||
- Standard OAuth flow
|
||||
|
||||
**Cons**:
|
||||
- Requires client_credentials support (may not be available)
|
||||
- Still needs broad cross-user permissions
|
||||
- Not well-suited for multi-user indexing
|
||||
|
||||
**Decision**: Rejected; token exchange is better fit for multi-user scenario
|
||||
|
||||
### Alternative 3: Per-User Access Token Storage
|
||||
|
||||
**Approach**: Store user access tokens (not refresh tokens)
|
||||
|
||||
**Pros**:
|
||||
- Simpler than refresh token flow
|
||||
- No token refresh logic needed
|
||||
|
||||
**Cons**:
|
||||
- Access tokens are short-lived (1-24 hours)
|
||||
- Requires frequent re-authentication
|
||||
- Poor user experience
|
||||
- Sync gaps when tokens expire
|
||||
|
||||
**Decision**: Rejected; refresh tokens provide better UX
|
||||
|
||||
### Alternative 4: On-Demand Indexing Only
|
||||
|
||||
**Approach**: Index content when user searches (no background worker)
|
||||
|
||||
**Pros**:
|
||||
- Uses user's request token
|
||||
- No background auth needed
|
||||
- Simpler architecture
|
||||
|
||||
**Cons**:
|
||||
- Very slow first search
|
||||
- Poor user experience
|
||||
- Incomplete index
|
||||
- Can't pre-compute embeddings
|
||||
|
||||
**Decision**: Rejected; background indexing is essential for semantic search
|
||||
|
||||
### Alternative 5: Nextcloud App Tokens
|
||||
|
||||
**Approach**: Generate app-specific passwords for each user
|
||||
|
||||
**Pros**:
|
||||
- Nextcloud-native feature
|
||||
- User-controlled revocation
|
||||
- Scoped per-application
|
||||
|
||||
**Cons**:
|
||||
- Requires user interaction to create
|
||||
- May not support programmatic creation
|
||||
- Still requires secure storage
|
||||
- Not standard OAuth
|
||||
|
||||
**Decision**: Rejected; not automatable for background worker
|
||||
|
||||
## Related Decisions
|
||||
|
||||
- ADR-001: Enhanced Note Search (establishes need for vector search)
|
||||
- [Future] ADR-003: Vector Database Selection
|
||||
- [Future] ADR-004: Embedding Model Strategy
|
||||
|
||||
## References
|
||||
|
||||
- [RFC 8693: OAuth 2.0 Token Exchange](https://datatracker.ietf.org/doc/html/rfc8693)
|
||||
- [RFC 6749: OAuth 2.0 - Refresh Tokens](https://datatracker.ietf.org/doc/html/rfc6749#section-1.5)
|
||||
- [OpenID Connect Core - Offline Access](https://openid.net/specs/openid-connect-core-1_0.html#OfflineAccess)
|
||||
- [OWASP: OAuth Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/OAuth2_Cheat_Sheet.html)
|
||||
- [RFC 8707: Resource Indicators for OAuth 2.0](https://datatracker.ietf.org/doc/html/rfc8707)
|
||||
File diff suppressed because it is too large
Load Diff
+542
-114
@@ -8,166 +8,463 @@ The Nextcloud MCP Server acts as an **OAuth 2.0 Resource Server**, protecting ac
|
||||
|
||||
## Architecture Diagram
|
||||
|
||||
The complete OAuth flow includes server startup (with DCR), client discovery (with PRM), authorization (with PKCE), and API access phases:
|
||||
|
||||
```
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
Phase 0: MCP Server Startup & Client Registration (DCR - RFC 7591)
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
┌──────────────────┐ ┌─────────────────┐
|
||||
│ MCP Server │ │ Nextcloud │
|
||||
│ (Resource │ │ (OIDC Provider)│
|
||||
│ Server) │ │ │
|
||||
└────────┬─────────┘ └────────┬────────┘
|
||||
│ │
|
||||
│ 0a. OIDC Discovery │
|
||||
├────────────────────────────────────>│
|
||||
│ GET │
|
||||
| /.well-known/openid-configuration │
|
||||
│ │
|
||||
│ 0b. Discovery response │
|
||||
│<────────────────────────────────────┤
|
||||
│ {issuer, endpoints, PKCE methods} │
|
||||
│ │
|
||||
│ 0c. Register OAuth client (DCR) │
|
||||
├────────────────────────────────────>│
|
||||
│ POST /apps/oidc/register │
|
||||
│ {client_name, redirect_uris, │
|
||||
│ scopes, token_type} │
|
||||
│ │
|
||||
│ 0d. Client credentials │
|
||||
│<────────────────────────────────────┤
|
||||
│ {client_id, client_secret} │
|
||||
│ → Saved to .nextcloud_oauth_*.json │
|
||||
│ │
|
||||
│ ✓ Server ready for connections │
|
||||
|
||||
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
Phase 1: Client Connection & Discovery (PRM - RFC 9728)
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
┌─────────────┐ ┌──────────────────┐ ┌─────────────────┐
|
||||
│ │ │ │ │ │
|
||||
│ MCP Client │ │ MCP Server │ │ Nextcloud │
|
||||
│ (Claude, │ │ (Resource │ │ Instance │
|
||||
│ etc.) │ │ Server) │ │ │
|
||||
│ │ │ MCP Server │ │ Nextcloud │
|
||||
│ MCP Client │ │ (Resource │ │ Instance │
|
||||
│ (Claude) │ │ Server) │ │ │
|
||||
│ │ │ │ │ │
|
||||
└──────┬──────┘ └────────┬─────────┘ └────────┬────────┘
|
||||
│ │ │
|
||||
│ │ │
|
||||
│ 1. Connect to MCP │ │
|
||||
│ 1a. Connect to MCP │ │
|
||||
├─────────────────────────────────>│ │
|
||||
│ │ │
|
||||
│ 2. Return auth settings │ │
|
||||
│ (issuer_url, scopes) │ │
|
||||
│ 1b. Return auth settings │ │
|
||||
│<─────────────────────────────────┤ │
|
||||
│ {issuer_url, resource_url} │ │
|
||||
│ │ │
|
||||
│ │ │
|
||||
│ 3. Start OAuth flow (with PKCE) │ │
|
||||
├──────────────────────────────────┼────────────────────────────────────>│
|
||||
│ │ /apps/oidc/authorize │
|
||||
│ │ │
|
||||
│ 4. User authenticates in browser│ │
|
||||
│<─────────────────────────────────┼─────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ 5. Authorization code (redirect)│ │
|
||||
│<─────────────────────────────────┤ │
|
||||
│ │ │
|
||||
│ 6. Exchange code for token │ │
|
||||
├──────────────────────────────────┼────────────────────────────────────>│
|
||||
│ │ /apps/oidc/token │
|
||||
│ │ │
|
||||
│ 7. Access token │ │
|
||||
│<─────────────────────────────────┼─────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ │ │
|
||||
│ 8. API request with Bearer token│ │
|
||||
│ 1c. PRM Discovery (RFC 9728) │ │
|
||||
├─────────────────────────────────>│ │
|
||||
│ Authorization: Bearer xxx │ │
|
||||
│ GET /.well-known/oauth- │ │
|
||||
│ protected-resource/mcp │ │
|
||||
│ │ │
|
||||
│ │ 9. Validate token via userinfo │
|
||||
│ ├────────────────────────────────────>│
|
||||
│ │ /apps/oidc/userinfo │
|
||||
│ │ │
|
||||
│ │ 10. User info (token valid) │
|
||||
│ │<────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ │ 11. Nextcloud API request │
|
||||
│ ├────────────────────────────────────>│
|
||||
│ │ Authorization: Bearer xxx │
|
||||
│ │ (Notes, Calendar, etc.) │
|
||||
│ │ │
|
||||
│ │ 12. API response │
|
||||
│ │<────────────────────────────────────┤
|
||||
│ │ │
|
||||
│ 13. MCP tool response │ │
|
||||
│ 1d. PRM response (scopes!) │ │
|
||||
│<─────────────────────────────────┤ │
|
||||
│ {resource, scopes_supported, │ ← Dynamically discovered from │
|
||||
│ authorization_servers} │ @require_scopes decorators │
|
||||
│ │ │
|
||||
|
||||
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
Phase 2: OAuth Authorization Flow (PKCE - RFC 7636)
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
│ │ │
|
||||
│ 2a. Generate PKCE challenge │ │
|
||||
│ code_verifier = random(43-128) │ │
|
||||
│ code_challenge = SHA256(verif.) │ │
|
||||
│ │ │
|
||||
│ 2b. Authorization request │ │
|
||||
├──────────────────────────────────┼────────────────────────────────────>│
|
||||
│ /apps/oidc/authorize? │ │
|
||||
│ client_id=xxx │ │
|
||||
│ &code_challenge=abc... │ │
|
||||
│ &code_challenge_method=S256 │ │
|
||||
│ &scope=openid notes:read ... │ │
|
||||
│ │ │
|
||||
│ 2c. User consent page │ │
|
||||
│<─────────────────────────────────┼─────────────────────────────────────┤
|
||||
│ (Browser: Select scopes) │ │
|
||||
│ │ │
|
||||
│ 2d. User grants scopes │ │
|
||||
├──────────────────────────────────┼────────────────────────────────────>│
|
||||
│ │ │
|
||||
│ 2e. Authorization code redirect │ │
|
||||
│<─────────────────────────────────┼─────────────────────────────────────┤
|
||||
│ callback?code=xyz123 │ │
|
||||
│ │ │
|
||||
│ 2f. Exchange code for token │ │
|
||||
├──────────────────────────────────┼────────────────────────────────────>│
|
||||
│ POST /apps/oidc/token │ │
|
||||
│ {code, code_verifier, │ ← Validates PKCE challenge │
|
||||
│ client_id, client_secret} │ │
|
||||
│ │ │
|
||||
│ 2g. Access token (JWT/opaque) │ │
|
||||
│<─────────────────────────────────┼─────────────────────────────────────┤
|
||||
│ {access_token, token_type, │ │
|
||||
│ scope: "openid notes:read...") │ ← User's granted scopes │
|
||||
│ │ │
|
||||
|
||||
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
Phase 3: MCP Tool Access (Scope-based Authorization)
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
│ │ │
|
||||
│ 3a. list_tools request │ │
|
||||
├─────────────────────────────────>│ │
|
||||
│ Authorization: Bearer <token> │ │
|
||||
│ │ │
|
||||
│ │ 3b. Validate token │
|
||||
│ ├────────────────────────────────────>│
|
||||
│ │ GET /apps/oidc/userinfo │
|
||||
│ │ Authorization: Bearer <token> │
|
||||
│ │ │
|
||||
│ │ 3c. Token valid + scopes │
|
||||
│ │<────────────────────────────────────┤
|
||||
│ │ {sub, scopes, ...} │
|
||||
│ │ ← Cached for 1 hour │
|
||||
│ │ │
|
||||
│ 3d. Filtered tool list │ │
|
||||
│<─────────────────────────────────┤ ← Only tools matching user's │
|
||||
│ [tools matching token scopes] │ token scopes (via @require_scopes)
|
||||
│ │ │
|
||||
│ 3e. Call tool │ │
|
||||
├─────────────────────────────────>│ │
|
||||
│ nc_notes_get_note(note_id=1) │ ← @require_scopes("notes:read") │
|
||||
│ Authorization: Bearer <token> │ │
|
||||
│ │ │
|
||||
│ │ 3f. Scope check PASSED │
|
||||
│ │ ✓ Token has notes:read │
|
||||
│ │ │
|
||||
│ │ 3g. Nextcloud API call │
|
||||
│ ├────────────────────────────────────>│
|
||||
│ │ GET /apps/notes/api/v1/notes/1 │
|
||||
│ │ Authorization: Bearer <token> │
|
||||
│ │ ← user_oidc validates Bearer token │
|
||||
│ │ │
|
||||
│ │ 3h. API response │
|
||||
│ │<────────────────────────────────────┤
|
||||
│ │ {id: 1, title: "Note", ...} │
|
||||
│ │ │
|
||||
│ 3i. MCP tool response │ │
|
||||
│<─────────────────────────────────┤ │
|
||||
│ {note data} │ │
|
||||
│ │ │
|
||||
|
||||
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
Insufficient Scope Example (Step-Up Authorization)
|
||||
═══════════════════════════════════════════════════════════════════════════════════
|
||||
|
||||
│ 4a. Call write tool │ │
|
||||
├─────────────────────────────────>│ │
|
||||
│ nc_notes_create_note(...) │ ← @require_scopes("notes:write") │
|
||||
│ Authorization: Bearer <token> │ │
|
||||
│ │ │
|
||||
│ │ 4b. Scope check FAILED │
|
||||
│ │ ✗ Token only has notes:read │
|
||||
│ │ │
|
||||
│ 4c. 403 Insufficient Scope │ │
|
||||
│<─────────────────────────────────┤ │
|
||||
│ WWW-Authenticate: Bearer │ │
|
||||
│ error="insufficient_scope", │ │
|
||||
│ scope="notes:write", │ │
|
||||
│ resource_metadata="..." │ │
|
||||
│ │ │
|
||||
│ → Client can re-authorize with │ │
|
||||
│ additional scopes (Step-Up) │ │
|
||||
│ │ │
|
||||
```
|
||||
|
||||
## Components
|
||||
|
||||
### 1. MCP Client
|
||||
- Any MCP-compatible client (Claude Desktop, Claude Code, custom clients)
|
||||
### 1. MCP Client (e.g., Claude Desktop, Claude Code)
|
||||
|
||||
**Capabilities**:
|
||||
- Discovers OAuth configuration via MCP server
|
||||
- Queries PRM endpoint for supported scopes
|
||||
- Initiates OAuth flow with PKCE (Proof Key for Code Exchange)
|
||||
- Stores and sends access token with each request
|
||||
- **Example**: Claude Desktop, Claude Code
|
||||
- Handles scope-based tool filtering
|
||||
- Supports step-up authorization (re-auth for additional scopes)
|
||||
|
||||
### 2. MCP Server (Resource Server)
|
||||
- **Role**: OAuth 2.0 Resource Server
|
||||
- **Location**: This Nextcloud MCP Server implementation
|
||||
- **Responsibilities**:
|
||||
- Validates Bearer tokens by calling Nextcloud's userinfo endpoint
|
||||
- Caches validated tokens (default: 1 hour TTL)
|
||||
- Creates authenticated Nextcloud client instances per-user
|
||||
- Enforces PKCE requirements (S256 code challenge method)
|
||||
- Exposes Nextcloud functionality via MCP tools
|
||||
**Examples**: Claude Desktop, Claude Code, MCP Inspector, custom MCP clients
|
||||
|
||||
### 2. MCP Server (Resource Server - This Implementation)
|
||||
|
||||
**Role**: OAuth 2.0 Resource Server (RFC 6749)
|
||||
|
||||
**Responsibilities**:
|
||||
|
||||
#### Startup Phase
|
||||
- **OIDC Discovery**: Queries `/.well-known/openid-configuration` for OAuth endpoints
|
||||
- **PKCE Validation**: Verifies server advertises S256 code challenge method
|
||||
- **Dynamic Client Registration (DCR)**: Automatically registers OAuth client via `/apps/oidc/register` (RFC 7591)
|
||||
- Or loads pre-configured client credentials
|
||||
- Saves credentials to `.nextcloud_oauth_client.json`
|
||||
- **Tool Registration**: Loads all MCP tools with their `@require_scopes` decorators
|
||||
|
||||
#### Client Connection Phase
|
||||
- **Auth Settings**: Returns OAuth issuer URL and resource URL
|
||||
- **PRM Endpoint**: Exposes `/.well-known/oauth-protected-resource/mcp` (RFC 9728)
|
||||
- Dynamically discovers scopes from all registered tools
|
||||
- Returns `scopes_supported` list based on `@require_scopes` decorators
|
||||
|
||||
#### Request Processing Phase
|
||||
- **Token Validation**: Validates Bearer tokens via Nextcloud userinfo endpoint
|
||||
- Supports both JWT and opaque tokens
|
||||
- Caches validation results (1-hour TTL)
|
||||
- Extracts user identity and granted scopes
|
||||
- **Scope Enforcement**:
|
||||
- Filters `list_tools` based on user's token scopes
|
||||
- Validates scopes before executing each tool
|
||||
- Returns 403 with `WWW-Authenticate` header for insufficient scopes
|
||||
- **Per-User Clients**: Creates authenticated `NextcloudClient` instance per user
|
||||
- Uses Bearer token for all Nextcloud API requests
|
||||
- User-specific permissions and audit trails
|
||||
|
||||
**Key Files**:
|
||||
- [`app.py`](../nextcloud_mcp_server/app.py) - OAuth mode detection and configuration
|
||||
- [`auth/token_verifier.py`](../nextcloud_mcp_server/auth/token_verifier.py) - Token validation logic
|
||||
- [`app.py`](../nextcloud_mcp_server/app.py) - OAuth mode, DCR, PRM endpoint
|
||||
- [`auth/token_verifier.py`](../nextcloud_mcp_server/auth/token_verifier.py) - Token validation (userinfo + introspection + JWT)
|
||||
- [`auth/context_helper.py`](../nextcloud_mcp_server/auth/context_helper.py) - Per-user client creation
|
||||
- [`auth/scope_authorization.py`](../nextcloud_mcp_server/auth/scope_authorization.py) - `@require_scopes` decorator, scope discovery
|
||||
- [`auth/client_registration.py`](../nextcloud_mcp_server/auth/client_registration.py) - DCR implementation (RFC 7591)
|
||||
|
||||
### 3. Nextcloud OIDC Apps
|
||||
|
||||
#### a) `oidc` - OIDC Identity Provider
|
||||
- **Role**: OAuth 2.0 Authorization Server
|
||||
- **Location**: Nextcloud app (`apps/oidc`)
|
||||
- **Endpoints**:
|
||||
- `/.well-known/openid-configuration` - Discovery endpoint
|
||||
- `/apps/oidc/authorize` - Authorization endpoint
|
||||
- `/apps/oidc/token` - Token endpoint
|
||||
- `/apps/oidc/userinfo` - User info endpoint (token validation)
|
||||
- `/apps/oidc/jwks` - JSON Web Key Set
|
||||
- `/apps/oidc/register` - Dynamic client registration
|
||||
|
||||
**Role**: OAuth 2.0 Authorization Server + OIDC Provider
|
||||
|
||||
**Location**: Nextcloud app (`apps/oidc`)
|
||||
|
||||
**Endpoints**:
|
||||
- `/.well-known/openid-configuration` - OIDC Discovery (RFC 8414)
|
||||
- `/apps/oidc/authorize` - Authorization endpoint (OAuth 2.0 + PKCE)
|
||||
- `/apps/oidc/token` - Token endpoint (issues JWT or opaque tokens)
|
||||
- `/apps/oidc/userinfo` - UserInfo endpoint (OIDC Core, used for token validation)
|
||||
- `/apps/oidc/jwks` - JSON Web Key Set (for JWT signature verification)
|
||||
- `/apps/oidc/register` - Dynamic Client Registration endpoint (RFC 7591)
|
||||
- `/apps/oidc/introspect` - Token Introspection endpoint (RFC 7662, optional)
|
||||
|
||||
**Token Types**:
|
||||
- **JWT tokens**: Self-contained tokens with embedded scopes, validated via JWKS or userinfo
|
||||
- **Opaque tokens**: Random strings, validated via userinfo or introspection endpoint
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
# Enable dynamic client registration (optional)
|
||||
# Settings → OIDC → "Allow dynamic client registration"
|
||||
# Enable dynamic client registration (recommended for development)
|
||||
# Nextcloud Admin → Settings → OIDC → "Allow dynamic client registration"
|
||||
|
||||
# Enable token introspection (optional, for opaque token validation)
|
||||
# Nextcloud Admin → Settings → OIDC → "Enable token introspection"
|
||||
```
|
||||
|
||||
#### b) `user_oidc` - OpenID Connect User Backend
|
||||
- **Role**: Bearer token validation middleware
|
||||
- **Location**: Nextcloud app (`apps/user_oidc`)
|
||||
- **Responsibilities**:
|
||||
- Validates Bearer tokens for Nextcloud API requests
|
||||
- Creates user sessions from valid Bearer tokens
|
||||
- Integrates with Nextcloud's authentication system
|
||||
|
||||
**Role**: Bearer token validation middleware for Nextcloud APIs
|
||||
|
||||
**Location**: Nextcloud app (`apps/user_oidc`)
|
||||
|
||||
**Responsibilities**:
|
||||
- Intercepts Nextcloud API requests with `Authorization: Bearer` header
|
||||
- Validates tokens against OIDC provider (`oidc` app)
|
||||
- Creates authenticated user sessions
|
||||
- Enforces user-specific permissions on API requests
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
# Enable Bearer token validation (required)
|
||||
# Enable Bearer token validation (required for OAuth mode)
|
||||
php occ config:system:set user_oidc oidc_provider_bearer_validation --value=true --type=boolean
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
> The `user_oidc` app requires a patch to properly support Bearer token authentication for non-OCS endpoints. See [Upstream Status](oauth-upstream-status.md) for details.
|
||||
> The `user_oidc` app requires a patch to properly support Bearer token authentication for non-OCS endpoints (like Notes API, Calendar API). See [Upstream Status](oauth-upstream-status.md) for patch details and PR status.
|
||||
|
||||
### 4. Nextcloud Instance
|
||||
- **Role**: Resource Owner / API Provider
|
||||
- **Provides**: Notes, Calendar, Contacts, Deck, Files, etc.
|
||||
|
||||
**Role**: Resource Owner + API Provider
|
||||
|
||||
**APIs Exposed**:
|
||||
- **Notes API**: `/apps/notes/api/v1/` - Note CRUD operations
|
||||
- **Calendar (CalDAV)**: `/remote.php/dav/calendars/` - Events and todos
|
||||
- **Contacts (CardDAV)**: `/remote.php/dav/addressbooks/` - Contact management
|
||||
- **Cookbook API**: `/apps/cookbook/api/v1/` - Recipe management
|
||||
- **Deck API**: `/apps/deck/api/v1.0/` - Kanban boards
|
||||
- **Tables API**: `/apps/tables/api/2/` - Table row operations
|
||||
- **WebDAV (Files)**: `/remote.php/dav/files/` - File operations
|
||||
- **Sharing API**: `/ocs/v2.php/apps/files_sharing/api/v1/` - Share management
|
||||
|
||||
## Authentication Flow
|
||||
|
||||
### Phase 1: OAuth Authorization (Steps 1-7)
|
||||
The OAuth flow consists of four distinct phases (see diagram above for visual representation):
|
||||
|
||||
1. **Client Connects**: MCP client connects to MCP server
|
||||
2. **Auth Settings**: MCP server returns OAuth settings:
|
||||
```json
|
||||
{
|
||||
"issuer_url": "https://nextcloud.example.com",
|
||||
"resource_server_url": "http://localhost:8000",
|
||||
"required_scopes": ["openid", "profile"]
|
||||
}
|
||||
```
|
||||
3. **OAuth Flow**: Client initiates OAuth flow with PKCE
|
||||
- Generates `code_verifier` (random string)
|
||||
- Calculates `code_challenge` = SHA256(code_verifier)
|
||||
- Redirects user to `/apps/oidc/authorize` with `code_challenge`
|
||||
4. **User Authentication**: User logs in to Nextcloud via browser
|
||||
5. **Authorization Code**: Nextcloud redirects back with authorization code
|
||||
6. **Token Exchange**: Client exchanges code for access token
|
||||
- Sends `code` + `code_verifier` to `/apps/oidc/token`
|
||||
- OIDC app validates PKCE challenge
|
||||
7. **Access Token**: Client receives access token (JWT or opaque)
|
||||
### Phase 0: MCP Server Startup (One-time Setup)
|
||||
|
||||
### Phase 2: API Access (Steps 8-13)
|
||||
**Happens**: On MCP server first startup
|
||||
|
||||
8. **API Request**: Client sends MCP request with Bearer token
|
||||
9. **Token Validation**: MCP server validates token:
|
||||
- Checks cache (1-hour TTL by default)
|
||||
- If not cached, calls `/apps/oidc/userinfo` with Bearer token
|
||||
- Extracts username from `sub` or `preferred_username` claim
|
||||
10. **User Info**: Nextcloud returns user info if token is valid
|
||||
11. **Nextcloud API Call**: MCP server calls Nextcloud API on behalf of user
|
||||
- Creates `NextcloudClient` instance with Bearer token
|
||||
- User-specific permissions apply
|
||||
12. **API Response**: Nextcloud returns data
|
||||
13. **MCP Response**: MCP server returns formatted response to client
|
||||
**Steps**:
|
||||
1. **OIDC Discovery** (`GET /.well-known/openid-configuration`)
|
||||
- MCP server queries Nextcloud for OAuth endpoints
|
||||
- Validates PKCE support (requires `S256` code challenge method)
|
||||
- Extracts endpoints: authorize, token, userinfo, jwks, register
|
||||
|
||||
2. **Dynamic Client Registration** (`POST /apps/oidc/register`)
|
||||
- If no pre-configured client credentials exist
|
||||
- MCP server registers itself as OAuth client (RFC 7591)
|
||||
- Provides: client name, redirect URIs, requested scopes, token type
|
||||
- Receives: `client_id`, `client_secret`
|
||||
- Saves credentials to `.nextcloud_oauth_client.json`
|
||||
|
||||
3. **Tool Registration**
|
||||
- All MCP tools loaded with their `@require_scopes` decorators
|
||||
- Scope metadata stored for later discovery
|
||||
|
||||
**Result**: MCP server ready to accept client connections
|
||||
|
||||
### Phase 1: Client Discovery (Per MCP Client Connection)
|
||||
|
||||
**Happens**: When MCP client first connects
|
||||
|
||||
**Steps**:
|
||||
1. **MCP Connection**
|
||||
- Client connects to MCP server
|
||||
- Server returns OAuth auth settings (issuer URL, resource URL)
|
||||
|
||||
2. **PRM Discovery** (`GET /.well-known/oauth-protected-resource/mcp`)
|
||||
- Client queries Protected Resource Metadata endpoint (RFC 9728)
|
||||
- Server **dynamically discovers** scopes from all registered tools
|
||||
- Returns: resource URL, `scopes_supported` list, authorization servers
|
||||
- Client now knows which scopes are available
|
||||
|
||||
**Result**: Client knows OAuth configuration and available scopes
|
||||
|
||||
### Phase 2: OAuth Authorization (PKCE Flow - RFC 7636)
|
||||
|
||||
**Happens**: User authorizes access
|
||||
|
||||
**Steps**:
|
||||
1. **PKCE Challenge Generation** (Client-side)
|
||||
- Generate `code_verifier`: random 43-128 character string
|
||||
- Calculate `code_challenge`: `BASE64URL(SHA256(code_verifier))`
|
||||
|
||||
2. **Authorization Request** (`GET /apps/oidc/authorize`)
|
||||
- Client redirects user to Nextcloud consent page
|
||||
- Parameters:
|
||||
- `client_id`: OAuth client ID
|
||||
- `code_challenge`: SHA256 hash of verifier
|
||||
- `code_challenge_method`: `S256`
|
||||
- `scope`: Requested scopes (e.g., `openid notes:read notes:write`)
|
||||
- `redirect_uri`: MCP server callback URL
|
||||
|
||||
3. **User Consent**
|
||||
- User authenticates to Nextcloud (if not already logged in)
|
||||
- User reviews and approves/denies requested scopes
|
||||
- Can select subset of requested scopes
|
||||
|
||||
4. **Authorization Code**
|
||||
- Nextcloud redirects to `callback?code=xyz123`
|
||||
- Code is bound to PKCE challenge
|
||||
|
||||
5. **Token Exchange** (`POST /apps/oidc/token`)
|
||||
- Client sends:
|
||||
- Authorization `code`
|
||||
- `code_verifier` (proves possession of original challenge)
|
||||
- `client_id` and `client_secret`
|
||||
- Nextcloud validates PKCE challenge: `SHA256(code_verifier) == code_challenge`
|
||||
- Nextcloud issues access token
|
||||
|
||||
6. **Access Token Response**
|
||||
- Token type: JWT or opaque (configurable)
|
||||
- Contains user's **granted scopes** (may be subset of requested)
|
||||
- Client stores token for subsequent requests
|
||||
|
||||
**Result**: Client has valid access token with granted scopes
|
||||
|
||||
### Phase 3: MCP Tool Access (Scope-Based Authorization)
|
||||
|
||||
**Happens**: Every MCP tool invocation
|
||||
|
||||
**Steps**:
|
||||
|
||||
#### Tool Listing (`list_tools`)
|
||||
1. **List Tools Request**
|
||||
- Client sends `list_tools` with `Authorization: Bearer <token>`
|
||||
|
||||
2. **Token Validation**
|
||||
- MCP server calls `/apps/oidc/userinfo` with Bearer token
|
||||
- Nextcloud returns user info including **granted scopes**
|
||||
- Result cached for 1 hour
|
||||
|
||||
3. **Dynamic Tool Filtering**
|
||||
- Server compares token scopes with each tool's `@require_scopes`
|
||||
- Only returns tools where user has all required scopes
|
||||
- Example: Token with `notes:read` sees 4 read tools, not 3 write tools
|
||||
|
||||
4. **Filtered Tool List**
|
||||
- Client receives only tools they can use
|
||||
|
||||
#### Tool Execution (e.g., `nc_notes_get_note`)
|
||||
1. **Tool Call**
|
||||
- Client invokes tool with `Authorization: Bearer <token>`
|
||||
|
||||
2. **Scope Validation**
|
||||
- `@require_scopes` decorator extracts token scopes
|
||||
- Verifies token contains required scope (e.g., `notes:read`)
|
||||
- If missing → 403 with `WWW-Authenticate` header (step-up auth)
|
||||
- If present → continues execution
|
||||
|
||||
3. **Nextcloud API Call**
|
||||
- MCP server creates `NextcloudClient` with Bearer token
|
||||
- Calls Nextcloud API (e.g., `GET /apps/notes/api/v1/notes/1`)
|
||||
- `user_oidc` app validates Bearer token again
|
||||
- Request executes as authenticated user
|
||||
|
||||
4. **Response**
|
||||
- Nextcloud returns data
|
||||
- MCP server formats response
|
||||
- Returns to client
|
||||
|
||||
**Result**: User can only access tools and data they have permissions for
|
||||
|
||||
### Phase 4: Insufficient Scope Handling (Step-Up Authorization)
|
||||
|
||||
**Happens**: When user lacks required scopes
|
||||
|
||||
**Steps**:
|
||||
1. **Tool Call with Insufficient Scopes**
|
||||
- User calls `nc_notes_create_note` (requires `notes:write`)
|
||||
- But token only has `notes:read`
|
||||
|
||||
2. **Scope Validation Fails**
|
||||
- `@require_scopes("notes:write")` decorator checks token
|
||||
- Finds `notes:write` missing
|
||||
|
||||
3. **403 Response with Challenge**
|
||||
- Returns `403 Forbidden`
|
||||
- Includes `WWW-Authenticate` header:
|
||||
```
|
||||
Bearer error="insufficient_scope",
|
||||
scope="notes:write",
|
||||
resource_metadata="http://localhost:8000/.well-known/oauth-protected-resource/mcp"
|
||||
```
|
||||
|
||||
4. **Client Re-Authorization** (Optional)
|
||||
- Client can initiate new OAuth flow requesting additional scopes
|
||||
- User re-consents with expanded permissions
|
||||
- New token includes both `notes:read` and `notes:write`
|
||||
|
||||
**Result**: User can dynamically upgrade permissions without full re-authentication
|
||||
|
||||
## Token Validation
|
||||
|
||||
@@ -272,14 +569,145 @@ client = get_client_from_context(ctx)
|
||||
- Protects against authorization code interception
|
||||
|
||||
### Scopes
|
||||
- Required scopes: `openid`, `profile`
|
||||
- Additional scopes inferred from userinfo response
|
||||
- Base required scopes: `openid`, `profile`, `email`
|
||||
- App-specific scopes control access to individual Nextcloud apps
|
||||
- See [OAuth Scopes](#oauth-scopes) section for complete scope reference
|
||||
|
||||
### Token Validation
|
||||
- Every MCP request validates Bearer token
|
||||
- Cached for performance (1-hour default)
|
||||
- Calls userinfo endpoint for validation
|
||||
|
||||
## OAuth Scopes
|
||||
|
||||
The Nextcloud MCP Server implements fine-grained OAuth scopes for each Nextcloud app integration. Scopes control which tools are visible and accessible to users based on their granted permissions.
|
||||
|
||||
### Scope-Based Access Control
|
||||
|
||||
When using OAuth authentication:
|
||||
1. **Dynamic Discovery**: The server automatically discovers all required scopes from `@require_scopes` decorators on MCP tools
|
||||
2. **Tool Filtering**: Tools are dynamically filtered based on the user's token scopes - users only see tools they have permission to use
|
||||
3. **Per-Tool Enforcement**: Each tool validates required scopes before execution, returning a 403 error if insufficient scopes are present
|
||||
|
||||
### Supported Scopes
|
||||
|
||||
The server supports the following OAuth scopes, organized by Nextcloud app:
|
||||
|
||||
#### Base OIDC Scopes
|
||||
- `openid` - OpenID Connect authentication (required)
|
||||
- `profile` - Access to user profile information (required)
|
||||
- `email` - Access to user email address (required)
|
||||
|
||||
#### Notes App
|
||||
- `notes:read` - Read notes, search notes, get note attachments
|
||||
- `notes:write` - Create, update, append to, and delete notes
|
||||
|
||||
#### Calendar App
|
||||
- `calendar:read` - List calendars, read events, search events
|
||||
- `calendar:write` - Create, update, and delete calendars and events
|
||||
|
||||
#### Calendar Tasks (VTODO)
|
||||
- `todo:read` - List and read CalDAV tasks
|
||||
- `todo:write` - Create, update, and delete CalDAV tasks
|
||||
|
||||
#### Contacts App
|
||||
- `contacts:read` - List address books and read contacts (CardDAV)
|
||||
- `contacts:write` - Create, update, and delete address books and contacts
|
||||
|
||||
#### Cookbook App
|
||||
- `cookbook:read` - Read recipes, search recipes
|
||||
- `cookbook:write` - Create, update, and delete recipes
|
||||
|
||||
#### Deck App
|
||||
- `deck:read` - List boards, stacks, cards, and labels
|
||||
- `deck:write` - Create, update, and delete boards, stacks, cards, and labels
|
||||
|
||||
#### Tables App
|
||||
- `tables:read` - List tables and read rows
|
||||
- `tables:write` - Create, update, and delete rows in tables
|
||||
|
||||
#### Files (WebDAV)
|
||||
- `files:read` - List files, read file contents, search files
|
||||
- `files:write` - Upload, update, move, copy, and delete files
|
||||
|
||||
#### Sharing
|
||||
- `sharing:read` - List shares and read share information
|
||||
- `sharing:write` - Create, update, and delete shares
|
||||
|
||||
### Scope Discovery
|
||||
|
||||
The MCP server provides scope discovery through two mechanisms:
|
||||
|
||||
#### 1. Protected Resource Metadata (PRM) Endpoint
|
||||
```bash
|
||||
# Query the PRM endpoint
|
||||
curl http://localhost:8000/.well-known/oauth-protected-resource/mcp
|
||||
|
||||
# Response includes dynamically discovered scopes
|
||||
{
|
||||
"resource": "http://localhost:8000/mcp",
|
||||
"scopes_supported": ["openid", "profile", "email", "notes:read", ...],
|
||||
"authorization_servers": ["https://nextcloud.example.com"],
|
||||
"bearer_methods_supported": ["header"],
|
||||
"resource_signing_alg_values_supported": ["RS256"]
|
||||
}
|
||||
```
|
||||
|
||||
The `scopes_supported` field is **dynamically generated** from all registered MCP tools, ensuring it always reflects the actual available scopes.
|
||||
|
||||
#### 2. Scope Enforcement via Decorators
|
||||
|
||||
Tools are decorated with `@require_scopes()` to declare their required permissions:
|
||||
|
||||
```python
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("notes:read")
|
||||
async def nc_notes_get_note(ctx: Context, note_id: int):
|
||||
"""Get a specific note by ID"""
|
||||
# Implementation
|
||||
```
|
||||
|
||||
### Client Registration Scopes
|
||||
|
||||
During OAuth client registration (dynamic or manual), clients request a set of scopes that define the **maximum allowed** scopes for that client. The actual per-tool enforcement is handled separately via decorators.
|
||||
|
||||
**Environment Variable**:
|
||||
```bash
|
||||
NEXTCLOUD_OIDC_SCOPES="openid profile email notes:read notes:write calendar:read calendar:write ..."
|
||||
```
|
||||
|
||||
**Default**: All supported scopes (recommended for development)
|
||||
|
||||
> **Note**: Client registration scopes define the maximum permissions. The MCP server's PRM endpoint dynamically advertises the actual supported scopes based on registered tools.
|
||||
|
||||
### Step-Up Authorization
|
||||
|
||||
The server supports OAuth step-up authorization (RFC 8693). If a user attempts to use a tool requiring scopes they don't have:
|
||||
|
||||
1. Tool returns `403 Forbidden` with `InsufficientScopeError`
|
||||
2. Response includes `WWW-Authenticate` header listing missing scopes:
|
||||
```
|
||||
WWW-Authenticate: Bearer error="insufficient_scope", scope="notes:write", resource_metadata="..."
|
||||
```
|
||||
3. Client can re-authorize with additional scopes
|
||||
|
||||
### Scope Validation
|
||||
|
||||
All scope enforcement happens at two levels:
|
||||
|
||||
1. **Tool Visibility**: During `list_tools` requests, only tools matching the user's token scopes are returned
|
||||
2. **Execution Time**: When calling a tool, the `@require_scopes` decorator validates the token has necessary scopes
|
||||
|
||||
**Example**:
|
||||
```python
|
||||
# User token has: ["openid", "profile", "email", "notes:read"]
|
||||
# They will see: 4 read-only notes tools
|
||||
# They will NOT see: 3 write notes tools (notes:write required)
|
||||
# Attempting to call a write tool returns 403 Forbidden
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
See [Configuration Guide](configuration.md) for all OAuth environment variables:
|
||||
|
||||
@@ -0,0 +1,441 @@
|
||||
# Ollama Capacity Analysis: ollama.internal.coutinho.io
|
||||
|
||||
**Date**: 2025-10-30
|
||||
**Model**: nomic-embed-text:latest
|
||||
**Test Location**: From nextcloud-mcp-server host
|
||||
|
||||
## Summary
|
||||
|
||||
✅ **Ollama instance is operational and performing well**
|
||||
- Embedding generation working correctly
|
||||
- Reasonable latency for small-medium workloads
|
||||
- Good parallelism support
|
||||
- Suitable for development and small production deployments
|
||||
|
||||
## Test Results
|
||||
|
||||
### Model Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"dimensions": 768,
|
||||
"status": "operational"
|
||||
}
|
||||
```
|
||||
|
||||
### Performance Metrics
|
||||
|
||||
#### 1. Single Embedding Latency
|
||||
|
||||
**Result**: ~553ms per embedding
|
||||
- **Total time**: 0.553 seconds
|
||||
- **Includes**: Network + processing + model inference
|
||||
- **Quality**: Full 768-dimensional vector
|
||||
|
||||
**Analysis**:
|
||||
- Higher than bare-metal benchmarks (~100ms) due to network latency
|
||||
- Acceptable for interactive search queries
|
||||
- Within expected range for remote Ollama instance
|
||||
|
||||
#### 2. Batch Processing (5 items)
|
||||
|
||||
**Result**: ~1.02 seconds for 5 embeddings
|
||||
- **Per-item average**: 204ms
|
||||
- **Throughput**: ~4.9 embeddings/sec
|
||||
- **Batch efficiency**: 2.7x faster than sequential
|
||||
|
||||
**Analysis**:
|
||||
- Good batching efficiency (2.7x speedup vs 5x theoretical)
|
||||
- Optimal for background indexing
|
||||
- Network overhead amortized across batch
|
||||
|
||||
#### 3. Batch Processing (20 items)
|
||||
|
||||
**Result**: ~6.71 seconds for 20 embeddings
|
||||
- **Per-item average**: 336ms
|
||||
- **Throughput**: ~3.0 embeddings/sec
|
||||
- **Batch efficiency**: 1.65x faster than sequential
|
||||
|
||||
**Analysis**:
|
||||
- Performance degrades slightly with larger batches
|
||||
- Still faster than sequential processing
|
||||
- Matches reported Ollama behavior (quality issues at batch >16)
|
||||
- **Recommendation**: Keep batch size ≤16 for best quality
|
||||
|
||||
#### 4. Concurrent Requests (5 parallel)
|
||||
|
||||
**Result**: ~1.27 seconds for 5 parallel requests
|
||||
- **Effective parallelism**: ~4x speedup (vs 2.77s sequential)
|
||||
- **Per-request average**: 254ms
|
||||
- **Throughput**: ~3.9 requests/sec
|
||||
|
||||
**Analysis**:
|
||||
- Excellent parallelism support
|
||||
- Server handles concurrent requests efficiently
|
||||
- Network and compute overlap effectively
|
||||
- Good for multi-user scenarios
|
||||
|
||||
## Capacity Planning
|
||||
|
||||
### Current Performance Profile
|
||||
|
||||
| Metric | Value | Rating |
|
||||
|--------|-------|--------|
|
||||
| Single embedding latency | 553ms | ⚠️ Moderate |
|
||||
| Batch (5) throughput | 4.9/sec | ✅ Good |
|
||||
| Batch (20) throughput | 3.0/sec | ⚠️ Moderate |
|
||||
| Concurrent throughput | 3.9/sec | ✅ Good |
|
||||
| Network latency | ~300-400ms | ⚠️ Significant |
|
||||
|
||||
### Bottleneck Analysis
|
||||
|
||||
**Primary Bottleneck**: Network latency (~300-400ms per request)
|
||||
- Model inference: ~100-200ms (estimated)
|
||||
- Network round-trip: ~300-400ms (measured overhead)
|
||||
- **Impact**: 60-70% of total latency is network
|
||||
|
||||
**Secondary Bottleneck**: CPU/GPU capacity (unknown hardware)
|
||||
- Batch performance degrades at >16 items
|
||||
- Suggests resource constraints
|
||||
- Likely CPU-only (no GPU metrics available)
|
||||
|
||||
### Recommended Usage Patterns
|
||||
|
||||
#### ✅ **Excellent For:**
|
||||
|
||||
**1. Background Indexing**
|
||||
- Use batch size of 10-15 items
|
||||
- Expected throughput: 3-5 embeddings/sec
|
||||
- **10,000 notes**: ~30-55 minutes to index
|
||||
- **1,000 notes**: ~3-5 minutes to index
|
||||
|
||||
**2. Interactive Search**
|
||||
- Single query embedding: ~550ms
|
||||
- Acceptable for user-facing search
|
||||
- Add 100-200ms for vector search + verification
|
||||
- **Total search time**: ~650-750ms (reasonable UX)
|
||||
|
||||
**3. Multi-User Development**
|
||||
- 5-10 concurrent users: Comfortable
|
||||
- Good parallelism support
|
||||
- Network latency dominates (shared)
|
||||
|
||||
#### ⚠️ **Consider Alternatives For:**
|
||||
|
||||
**1. Real-Time Applications**
|
||||
- Sub-100ms latency requirements
|
||||
- High-frequency queries (>10/sec sustained)
|
||||
- Consider: Local embeddings or Infinity
|
||||
|
||||
**2. Large-Scale Batch Processing**
|
||||
- >100,000 documents to index
|
||||
- >10 embeddings/sec sustained
|
||||
- Consider: GPU-accelerated TEI
|
||||
|
||||
**3. Production with >50 Users**
|
||||
- High concurrent load
|
||||
- Latency sensitivity
|
||||
- Consider: Dedicated embedding service
|
||||
|
||||
### Deployment Scenarios
|
||||
|
||||
#### Scenario 1: Development Environment
|
||||
|
||||
**Profile**:
|
||||
- 1-3 developers
|
||||
- 1,000-5,000 notes total
|
||||
- Occasional searches/indexing
|
||||
|
||||
**Verdict**: ✅ **Perfect fit**
|
||||
- Initial index: ~5-15 minutes (one-time)
|
||||
- Incremental updates: <1 minute
|
||||
- Search latency: Acceptable
|
||||
- No infrastructure changes needed
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=600 # 10 minutes
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
```
|
||||
|
||||
#### Scenario 2: Small Production (10-20 users)
|
||||
|
||||
**Profile**:
|
||||
- 10-20 active users
|
||||
- 10,000-50,000 notes total
|
||||
- 50-200 searches/day
|
||||
- Nightly incremental indexing
|
||||
|
||||
**Verdict**: ✅ **Suitable with optimizations**
|
||||
- Initial index: 1-3 hours (run overnight)
|
||||
- Incremental: 5-15 minutes/night
|
||||
- Search: Acceptable for most users
|
||||
- Monitor network latency
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=86400 # Daily at night
|
||||
VECTOR_SYNC_BATCH_SIZE=12 # Conservative for quality
|
||||
SEARCH_TIMEOUT_MS=1000 # Account for 550ms latency
|
||||
```
|
||||
|
||||
**Optimizations**:
|
||||
- Run sync during off-hours
|
||||
- Cache query embeddings (common searches)
|
||||
- Use hybrid search (keyword + semantic)
|
||||
|
||||
#### Scenario 3: Medium Production (50-100 users)
|
||||
|
||||
**Profile**:
|
||||
- 50-100 active users
|
||||
- 100,000+ notes
|
||||
- 500-1000 searches/day
|
||||
- Real-time indexing desired
|
||||
|
||||
**Verdict**: ⚠️ **Marginal - monitor closely**
|
||||
- Initial index: 5-10 hours
|
||||
- Search latency: May feel slow for some users
|
||||
- Concurrent load: Approaching limits
|
||||
- **Recommendation**: Plan migration to Infinity
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=3600 # Hourly
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
SEMANTIC_WEIGHT=0.5 # Rely more on keyword search
|
||||
SEARCH_TIMEOUT_MS=2000 # Generous timeout
|
||||
```
|
||||
|
||||
**Migration Path**:
|
||||
- Start with Ollama
|
||||
- Monitor latency metrics
|
||||
- When p95 latency >1s, migrate to Infinity
|
||||
- Keep Ollama as fallback
|
||||
|
||||
#### Scenario 4: Large Production (>100 users)
|
||||
|
||||
**Profile**:
|
||||
- >100 active users
|
||||
- >500,000 notes
|
||||
- >1000 searches/day
|
||||
- Real-time expectations
|
||||
|
||||
**Verdict**: ❌ **Not recommended**
|
||||
- Latency too high for scale
|
||||
- Throughput insufficient
|
||||
- Network becomes bottleneck
|
||||
- **Recommendation**: Use Infinity or TEI from start
|
||||
|
||||
## Network Latency Optimization
|
||||
|
||||
### Current Overhead: ~300-400ms
|
||||
|
||||
**If MCP server runs closer to Ollama**:
|
||||
```
|
||||
Same VPC/network: ~1-5ms (300-400ms savings!)
|
||||
Same host: <1ms (300-400ms savings!)
|
||||
```
|
||||
|
||||
### Recommendation
|
||||
|
||||
**Option A: Co-locate MCP server with Ollama**
|
||||
- Reduces latency from 550ms → 150-200ms
|
||||
- 2.5-3x improvement
|
||||
- Makes Ollama competitive with cloud APIs
|
||||
|
||||
**Option B: Keep separate (current)**
|
||||
- Simpler deployment
|
||||
- Better security isolation
|
||||
- Accept 550ms latency
|
||||
|
||||
**Option C: Add Infinity container to MCP server**
|
||||
- Best of both worlds
|
||||
- Use Infinity for speed (local)
|
||||
- Fallback to Ollama if needed
|
||||
|
||||
## Capacity Estimates
|
||||
|
||||
### Indexing Capacity
|
||||
|
||||
**Sustained Throughput**: 3-4 embeddings/sec (conservative)
|
||||
|
||||
| Document Count | Index Time | Notes |
|
||||
|----------------|------------|-------|
|
||||
| 1,000 | 4-5 min | Quick |
|
||||
| 5,000 | 20-25 min | Reasonable |
|
||||
| 10,000 | 40-50 min | Acceptable |
|
||||
| 50,000 | 3.5-4.5 hours | Overnight job |
|
||||
| 100,000 | 7-9 hours | Long batch |
|
||||
| 500,000 | 35-45 hours | Not recommended |
|
||||
|
||||
**Incremental Updates** (10% change daily):
|
||||
- 1,000 docs: ~30 sec
|
||||
- 10,000 docs: ~5 min
|
||||
- 50,000 docs: ~25 min
|
||||
|
||||
### Search Capacity
|
||||
|
||||
**Query Latency Budget**:
|
||||
- Embedding: 550ms
|
||||
- Vector search: 50-100ms
|
||||
- Permission verification: 50-100ms
|
||||
- **Total**: 650-750ms
|
||||
|
||||
**Concurrent Users** (assuming 1 search every 5 minutes):
|
||||
- 10 users: 2 queries/min → Comfortable
|
||||
- 50 users: 10 queries/min → Near limit
|
||||
- 100 users: 20 queries/min → Over capacity
|
||||
|
||||
**Peak Load** (all users search at once):
|
||||
- Parallelism: ~4 concurrent
|
||||
- Queue time: Proportional to position
|
||||
- 10 simultaneous: ~1.5-2 sec for last user
|
||||
- 50 simultaneous: ~7-10 sec for last user
|
||||
|
||||
## Recommendations
|
||||
|
||||
### Immediate Actions (Development)
|
||||
|
||||
1. **✅ Use Ollama as-is**
|
||||
- Current setup is perfect for dev/testing
|
||||
- No changes needed
|
||||
- Start building semantic search
|
||||
|
||||
2. **Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
```
|
||||
|
||||
3. **Add Monitoring**:
|
||||
```python
|
||||
# Track these metrics
|
||||
- embedding_latency_seconds (histogram)
|
||||
- embedding_batch_size (gauge)
|
||||
- embedding_errors_total (counter)
|
||||
```
|
||||
|
||||
### Short-Term (Small Production)
|
||||
|
||||
1. **Optimize Batching**:
|
||||
- Use batch size 10-12 (quality sweet spot)
|
||||
- Process during off-hours
|
||||
- Implement incremental sync
|
||||
|
||||
2. **Add Caching**:
|
||||
```python
|
||||
# Cache common query embeddings
|
||||
@lru_cache(maxsize=1000)
|
||||
async def embed_with_cache(query: str):
|
||||
return await ollama.embed(query)
|
||||
```
|
||||
|
||||
3. **Monitor Metrics**:
|
||||
- P50, P95, P99 latency
|
||||
- Throughput (embeddings/sec)
|
||||
- Error rates
|
||||
|
||||
### Medium-Term (If Scaling Up)
|
||||
|
||||
1. **Add Infinity Container** (when >50 users or latency issues):
|
||||
```yaml
|
||||
services:
|
||||
infinity:
|
||||
image: michaelf34/infinity:latest
|
||||
# Local to MCP server - ~10-20ms latency
|
||||
```
|
||||
|
||||
2. **Implement Tiered Fallback**:
|
||||
```
|
||||
Infinity (local, fast) → Ollama (remote, slower) → Local model
|
||||
```
|
||||
|
||||
3. **Load Testing**:
|
||||
- Simulate 50-100 concurrent users
|
||||
- Measure actual throughput limits
|
||||
- Identify breaking points
|
||||
|
||||
### Long-Term (Enterprise Scale)
|
||||
|
||||
1. **Migrate to TEI Cluster** (when >100 users):
|
||||
- GPU-accelerated
|
||||
- Horizontal scaling
|
||||
- <20ms latency
|
||||
|
||||
2. **Consider Managed Services**:
|
||||
- Pinecone, Qdrant Cloud
|
||||
- Removes operational burden
|
||||
- Better SLAs
|
||||
|
||||
## Testing Recommendations
|
||||
|
||||
### Load Testing Script
|
||||
|
||||
```bash
|
||||
# Test sustained load
|
||||
for i in {1..100}; do
|
||||
curl -s https://ollama.internal.coutinho.io/api/embed \
|
||||
-d "{\"model\": \"nomic-embed-text\", \"input\": \"Test $i\"}" &
|
||||
|
||||
# Rate limit: 5 concurrent
|
||||
if [ $(($i % 5)) -eq 0 ]; then
|
||||
wait
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
### Metrics to Collect
|
||||
|
||||
1. **Latency Distribution**:
|
||||
- P50 (median)
|
||||
- P95 (acceptable)
|
||||
- P99 (outliers)
|
||||
|
||||
2. **Throughput**:
|
||||
- Embeddings/second
|
||||
- Peak vs sustained
|
||||
|
||||
3. **Error Rates**:
|
||||
- Timeouts
|
||||
- Server errors
|
||||
- Quality issues
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Your Ollama instance is ready for development and small production use!**
|
||||
|
||||
**Current Capacity**:
|
||||
- ✅ Development: Unlimited
|
||||
- ✅ Small prod (10-20 users, 10k docs): Comfortable
|
||||
- ⚠️ Medium prod (50 users, 50k docs): Monitoring needed
|
||||
- ❌ Large prod (>100 users): Migrate to Infinity/TEI
|
||||
|
||||
**Key Strengths**:
|
||||
- Fully operational
|
||||
- Good parallelism
|
||||
- Acceptable latency for most use cases
|
||||
- Easy to integrate
|
||||
|
||||
**Key Limitations**:
|
||||
- Network latency adds 300-400ms overhead
|
||||
- Batch quality issues at >16 items
|
||||
- Limited scalability beyond 50 users
|
||||
|
||||
**Recommendation**:
|
||||
Start using Ollama immediately for development. Add monitoring and plan for Infinity when you approach 50 users or experience latency issues. The abstraction layer in ADR-003 makes migration seamless.
|
||||
|
||||
**Next Steps**:
|
||||
1. Configure MCP server with Ollama URL
|
||||
2. Implement semantic search tools
|
||||
3. Add basic monitoring
|
||||
4. Test with real workload
|
||||
5. Scale up as needed
|
||||
@@ -0,0 +1,796 @@
|
||||
# Ollama Embeddings Investigation
|
||||
|
||||
**Date**: 2025-10-30
|
||||
**Status**: Recommendation for Integration
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Ollama provides a **local, self-hosted embedding solution** that is excellent for **development and small-scale deployments** but has **performance limitations** compared to specialized embedding inference engines (TEI, Infinity).
|
||||
|
||||
**Recommendation**: Include Ollama as **Tier 2 fallback** in our embedding strategy (after cloud APIs, before local sentence-transformers), prioritizing ease of setup over maximum performance.
|
||||
|
||||
## Overview
|
||||
|
||||
Ollama is primarily known as a local LLM runner but added embedding model support in version 0.1.26, making it a convenient option for generating vector embeddings without external API dependencies.
|
||||
|
||||
### Key Characteristics
|
||||
|
||||
- **Local & Self-Hosted**: No external API calls, full privacy
|
||||
- **Easy Setup**: Single binary, simple model downloads (`ollama pull nomic-embed-text`)
|
||||
- **Unified Platform**: Same tool for both LLMs and embeddings
|
||||
- **OpenAI Compatible**: `/v1/embeddings` endpoint for drop-in replacement
|
||||
- **Multi-Platform**: Linux, macOS, Windows support
|
||||
- **GPU Support**: CUDA, ROCm, Metal acceleration
|
||||
|
||||
## API Details
|
||||
|
||||
### Endpoint Structure
|
||||
|
||||
**New API** (recommended):
|
||||
```bash
|
||||
POST http://localhost:11434/api/embed
|
||||
```
|
||||
|
||||
**OpenAI Compatible**:
|
||||
```bash
|
||||
POST http://localhost:11434/v1/embeddings
|
||||
```
|
||||
|
||||
**Legacy API** (deprecated):
|
||||
```bash
|
||||
POST http://localhost:11434/api/embeddings
|
||||
```
|
||||
|
||||
### Request Format
|
||||
|
||||
**Single Text Embedding**:
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"input": "Text to embed"
|
||||
}
|
||||
```
|
||||
|
||||
**Batch Embedding** (since v0.2.0):
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"input": [
|
||||
"First text to embed",
|
||||
"Second text to embed",
|
||||
"Third text to embed"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"embeddings": [
|
||||
[0.123, -0.456, 0.789, ...], // 768 dimensions for nomic-embed-text
|
||||
[0.234, -0.567, 0.890, ...]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Python Integration
|
||||
|
||||
```python
|
||||
import ollama
|
||||
|
||||
# Single embedding
|
||||
response = ollama.embed(
|
||||
model='nomic-embed-text',
|
||||
input='Text to embed'
|
||||
)
|
||||
embedding = response['embeddings'][0]
|
||||
|
||||
# Batch embeddings (more efficient)
|
||||
response = ollama.embed(
|
||||
model='nomic-embed-text',
|
||||
input=[
|
||||
'First text',
|
||||
'Second text',
|
||||
'Third text'
|
||||
]
|
||||
)
|
||||
embeddings = response['embeddings']
|
||||
```
|
||||
|
||||
## Available Models
|
||||
|
||||
### 1. nomic-embed-text (Recommended)
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 137M
|
||||
- **Dimensions**: 768
|
||||
- **Context Length**: 8,192 tokens (2K effective)
|
||||
- **Size**: 274MB
|
||||
- **Architecture**: BERT-based
|
||||
|
||||
**Performance**:
|
||||
- Outperforms OpenAI `text-embedding-ada-002` and `text-embedding-3-small`
|
||||
- Excellent for long-context tasks
|
||||
- Strong general-purpose performance
|
||||
|
||||
**Use Cases**:
|
||||
- General RAG applications
|
||||
- Long document processing
|
||||
- Semantic search
|
||||
- Document clustering
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull nomic-embed-text
|
||||
```
|
||||
|
||||
### 2. mxbai-embed-large
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 334M
|
||||
- **Dimensions**: 1,024
|
||||
- **Context Length**: 512 tokens
|
||||
- **Architecture**: BERT-large optimized
|
||||
|
||||
**Performance**:
|
||||
- Claims to outperform commercial models
|
||||
- Higher precision for complex queries
|
||||
- Best quality but slower
|
||||
|
||||
**Use Cases**:
|
||||
- High-precision semantic search
|
||||
- Enterprise knowledge bases
|
||||
- Multilingual content
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull mxbai-embed-large
|
||||
```
|
||||
|
||||
### 3. all-minilm
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 23M
|
||||
- **Dimensions**: 384
|
||||
- **Context Length**: 256 tokens
|
||||
- **Size**: Smallest footprint
|
||||
|
||||
**Performance**:
|
||||
- Fastest processing speed
|
||||
- Good for sentence-level tasks
|
||||
- Limited context window
|
||||
|
||||
**Use Cases**:
|
||||
- Real-time applications
|
||||
- Resource-constrained environments
|
||||
- High-throughput scenarios
|
||||
- Development/testing
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull all-minilm
|
||||
```
|
||||
|
||||
## Performance Benchmarks
|
||||
|
||||
### Throughput Comparison
|
||||
|
||||
| Hardware | Model | Batch Size | Throughput | Notes |
|
||||
|----------|-------|------------|------------|-------|
|
||||
| RTX 4090 (24GB) | nomic-embed-text | 256 | 12,450 tok/sec | GPU-accelerated |
|
||||
| RTX 4090 (24GB) | mxbai-embed-large | 128 | 8,920 tok/sec | GPU-accelerated |
|
||||
| Intel i9-13900K (CPU) | nomic-embed-text | 32 | 3,250 tok/sec | CPU-only |
|
||||
| Intel i9-13900K (CPU) | mxbai-embed-large | 16 | 2,180 tok/sec | CPU-only |
|
||||
|
||||
### Latency Comparison
|
||||
|
||||
**Single Request Latency** (RTX 4060):
|
||||
- Ollama: ~99ms
|
||||
- TEI: ~20ms (5x faster)
|
||||
- Infinity: ~30-40ms (2.5-3x faster)
|
||||
|
||||
**Batch Processing**:
|
||||
- Optimal batch size: 32-64 (model dependent)
|
||||
- Performance degrades with batches >16 (quality issues reported)
|
||||
- 2x slower than direct sentence-transformers usage
|
||||
|
||||
### Engine Comparison
|
||||
|
||||
Based on benchmarks from Baseten (2024):
|
||||
|
||||
| Engine | Relative Throughput | Notes |
|
||||
|--------|---------------------|-------|
|
||||
| BEI | 9.0x (baseline) | Fastest (proprietary) |
|
||||
| TEI | 4.5x | Open source, Rust-based |
|
||||
| Infinity | 3.5x | PyTorch/ONNX optimized |
|
||||
| vLLM | 3.0x | General LLM inference |
|
||||
| **Ollama** | **1.0x** | Slowest for embeddings |
|
||||
|
||||
**Key Insight**: Ollama is **5-9x slower** than specialized embedding engines but trades performance for ease of use and unified platform.
|
||||
|
||||
## Integration Implementation
|
||||
|
||||
### Python Client Wrapper
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/embeddings/ollama.py
|
||||
import httpx
|
||||
from typing import List
|
||||
|
||||
|
||||
class OllamaEmbedding:
|
||||
"""Ollama embedding provider"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = "http://localhost:11434",
|
||||
model: str = "nomic-embed-text"
|
||||
):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
self.client = httpx.AsyncClient(timeout=60.0)
|
||||
|
||||
# Model dimension mapping
|
||||
self.dimensions = {
|
||||
"nomic-embed-text": 768,
|
||||
"mxbai-embed-large": 1024,
|
||||
"all-minilm": 384
|
||||
}
|
||||
self.dimension = self.dimensions.get(model, 768)
|
||||
|
||||
async def embed(self, text: str) -> List[float]:
|
||||
"""Generate embedding for single text"""
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={
|
||||
"model": self.model,
|
||||
"input": text
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["embeddings"][0]
|
||||
|
||||
async def embed_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
batch_size: int = 32
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts in batches.
|
||||
|
||||
Note: Ollama has reported quality issues with batch sizes >16.
|
||||
We use batch_size=32 as default but allow configuration.
|
||||
"""
|
||||
all_embeddings = []
|
||||
|
||||
# Process in chunks to avoid batch size issues
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i:i + batch_size]
|
||||
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={
|
||||
"model": self.model,
|
||||
"input": batch
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
all_embeddings.extend(data["embeddings"])
|
||||
|
||||
return all_embeddings
|
||||
|
||||
async def check_health(self) -> bool:
|
||||
"""Check if Ollama server is running and model is available"""
|
||||
try:
|
||||
# Check if server is up
|
||||
response = await self.client.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if model is pulled
|
||||
models = response.json().get("models", [])
|
||||
model_names = [m["name"] for m in models]
|
||||
|
||||
if self.model not in model_names:
|
||||
raise ValueError(
|
||||
f"Model '{self.model}' not found. "
|
||||
f"Run: ollama pull {self.model}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Ollama health check failed: {e}")
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client"""
|
||||
await self.client.aclose()
|
||||
```
|
||||
|
||||
### Auto-Detection in Embedding Service
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/embeddings/service.py
|
||||
from typing import Optional
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""Unified embedding service with automatic provider detection"""
|
||||
|
||||
def __init__(self):
|
||||
self.provider = None
|
||||
self._detect_provider()
|
||||
|
||||
def _detect_provider(self):
|
||||
"""Auto-detect available embedding provider"""
|
||||
|
||||
# Tier 1: OpenAI API (best quality)
|
||||
if os.getenv("OPENAI_API_KEY"):
|
||||
from .openai import OpenAIEmbedding
|
||||
self.provider = OpenAIEmbedding(
|
||||
model=os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"),
|
||||
api_key=os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
logger.info("✓ Using OpenAI embeddings")
|
||||
return
|
||||
|
||||
# Tier 2a: Infinity (optimized self-hosted)
|
||||
if os.getenv("INFINITY_URL"):
|
||||
from .infinity import InfinityEmbedding
|
||||
try:
|
||||
self.provider = InfinityEmbedding(
|
||||
url=os.getenv("INFINITY_URL"),
|
||||
model=os.getenv("EMBEDDING_MODEL", "BAAI/bge-small-en-v1.5")
|
||||
)
|
||||
logger.info("✓ Using Infinity embeddings (optimized)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Infinity unavailable: {e}")
|
||||
|
||||
# Tier 2b: Ollama (easy self-hosted)
|
||||
if os.getenv("OLLAMA_URL"):
|
||||
from .ollama import OllamaEmbedding
|
||||
try:
|
||||
self.provider = OllamaEmbedding(
|
||||
base_url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
|
||||
model=os.getenv("OLLAMA_MODEL", "nomic-embed-text")
|
||||
)
|
||||
# Verify Ollama is running and model is available
|
||||
import asyncio
|
||||
asyncio.run(self.provider.check_health())
|
||||
logger.info("✓ Using Ollama embeddings (easy setup)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Ollama unavailable: {e}")
|
||||
|
||||
# Tier 3: Local model (fallback)
|
||||
logger.warning("No cloud/hosted embeddings available, using local model")
|
||||
from .local import LocalEmbedding
|
||||
self.provider = LocalEmbedding(
|
||||
model=os.getenv("LOCAL_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||
)
|
||||
logger.info("✓ Using local embeddings (CPU fallback)")
|
||||
|
||||
async def embed(self, text: str):
|
||||
"""Generate embedding for text"""
|
||||
return await self.provider.embed(text)
|
||||
|
||||
async def embed_batch(self, texts: list[str]):
|
||||
"""Generate embeddings for multiple texts"""
|
||||
return await self.provider.embed_batch(texts)
|
||||
|
||||
@property
|
||||
def dimension(self) -> int:
|
||||
"""Get embedding dimension"""
|
||||
return self.provider.dimension
|
||||
```
|
||||
|
||||
### Docker Compose Configuration
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# Ollama embedding service
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:11434:11434
|
||||
volumes:
|
||||
- ollama_models:/root/.ollama
|
||||
# Optional: GPU support
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
# Pull models on startup
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
ollama serve &
|
||||
sleep 5
|
||||
ollama pull nomic-embed-text
|
||||
wait
|
||||
|
||||
# MCP Server with Ollama embeddings
|
||||
mcp:
|
||||
build: .
|
||||
depends_on:
|
||||
- ollama
|
||||
environment:
|
||||
# ... other vars ...
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
# Vector sync worker
|
||||
mcp-vector-sync:
|
||||
build: .
|
||||
command: ["python", "-m", "nextcloud_mcp_server.sync.vector_indexer"]
|
||||
depends_on:
|
||||
- ollama
|
||||
- qdrant
|
||||
environment:
|
||||
# ... other vars ...
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
volumes:
|
||||
ollama_models:
|
||||
```
|
||||
|
||||
## Advantages of Ollama
|
||||
|
||||
### 1. **Ease of Setup**
|
||||
|
||||
```bash
|
||||
# Install Ollama
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
# Pull embedding model
|
||||
ollama pull nomic-embed-text
|
||||
|
||||
# Done! API available at localhost:11434
|
||||
```
|
||||
|
||||
No complex configuration, no Docker registries, no model conversion.
|
||||
|
||||
### 2. **Privacy & Data Sovereignty**
|
||||
|
||||
- All processing happens locally
|
||||
- No data leaves your infrastructure
|
||||
- No API keys or external dependencies
|
||||
- Ideal for sensitive content (medical, legal, financial)
|
||||
|
||||
### 3. **Unified Platform**
|
||||
|
||||
- Same tool for LLMs and embeddings
|
||||
- Consistent API across model types
|
||||
- Single point of management
|
||||
- Simplified operations
|
||||
|
||||
### 4. **Developer Experience**
|
||||
|
||||
- Simple API (similar to OpenAI)
|
||||
- Good documentation
|
||||
- Active community
|
||||
- Framework integrations (LangChain, LlamaIndex)
|
||||
|
||||
### 5. **Cost**
|
||||
|
||||
- Free and open source
|
||||
- No per-token API costs
|
||||
- Only infrastructure costs (compute)
|
||||
|
||||
### 6. **Model Variety**
|
||||
|
||||
Growing library of embedding models:
|
||||
- nomic-embed-text (general purpose)
|
||||
- mxbai-embed-large (high quality)
|
||||
- all-minilm (fast)
|
||||
- More models added regularly
|
||||
|
||||
## Limitations of Ollama
|
||||
|
||||
### 1. **Performance**
|
||||
|
||||
- **5-9x slower** than specialized engines (TEI, Infinity)
|
||||
- Not optimized specifically for embedding inference
|
||||
- Batch processing issues at larger batch sizes (>16)
|
||||
- Higher latency compared to alternatives
|
||||
|
||||
### 2. **Scalability**
|
||||
|
||||
- Single-instance deployment (no native clustering)
|
||||
- Limited concurrent request handling
|
||||
- Not designed for high-throughput production
|
||||
- Resource usage per request is higher
|
||||
|
||||
### 3. **Batch Processing Issues**
|
||||
|
||||
- Quality degradation reported with large batches
|
||||
- Optimal batch size: 32-64 (conservative)
|
||||
- Less efficient than specialized engines
|
||||
- GitHub issues tracking batch problems (#6262)
|
||||
|
||||
### 4. **Resource Usage**
|
||||
|
||||
- Models stay loaded in memory (VRAM/RAM)
|
||||
- Higher memory footprint per model
|
||||
- GPU context switching overhead
|
||||
- Not as memory-efficient as specialized engines
|
||||
|
||||
### 5. **Production Features**
|
||||
|
||||
- No built-in load balancing
|
||||
- Limited monitoring/metrics
|
||||
- No automatic scaling
|
||||
- Basic error handling
|
||||
|
||||
## Use Case Recommendations
|
||||
|
||||
### ✅ **Excellent For:**
|
||||
|
||||
1. **Development & Testing**
|
||||
- Quick setup for prototyping
|
||||
- Local development environments
|
||||
- Testing embedding pipelines
|
||||
|
||||
2. **Small Deployments**
|
||||
- <10 users
|
||||
- <10,000 documents
|
||||
- Infrequent searches (<100/day)
|
||||
- Hobbyist/personal projects
|
||||
|
||||
3. **Privacy-Critical Applications**
|
||||
- Medical/healthcare records
|
||||
- Legal documents
|
||||
- Financial data
|
||||
- Air-gapped environments
|
||||
|
||||
4. **Unified LLM Stack**
|
||||
- Projects already using Ollama for LLMs
|
||||
- Simplified operations
|
||||
- Consistent tooling
|
||||
|
||||
5. **Educational/Learning**
|
||||
- Teaching RAG concepts
|
||||
- Learning embeddings
|
||||
- Hackathons/workshops
|
||||
|
||||
### ⚠️ **Consider Alternatives For:**
|
||||
|
||||
1. **Production at Scale**
|
||||
- >100 users
|
||||
- >100,000 documents
|
||||
- High query volume (>1000/day)
|
||||
- Use: TEI or Infinity
|
||||
|
||||
2. **Performance-Critical**
|
||||
- Real-time search (<50ms latency)
|
||||
- High-throughput batch processing
|
||||
- Use: TEI with GPU
|
||||
|
||||
3. **Enterprise Deployments**
|
||||
- Need for high availability
|
||||
- Load balancing requirements
|
||||
- Advanced monitoring
|
||||
- Use: Managed services or TEI cluster
|
||||
|
||||
4. **Large-Scale Indexing**
|
||||
- Millions of documents
|
||||
- Continuous high-volume ingestion
|
||||
- Use: Infinity or commercial solutions
|
||||
|
||||
## Integration Strategy
|
||||
|
||||
### Recommended Tier Placement
|
||||
|
||||
**Update ADR-003 embedding strategy:**
|
||||
|
||||
```
|
||||
Tier 1: OpenAI API (best quality, requires API key)
|
||||
↓ fallback
|
||||
Tier 2a: Infinity (optimized self-hosted, complex setup)
|
||||
↓ fallback
|
||||
Tier 2b: Ollama (easy self-hosted, moderate performance) ← NEW
|
||||
↓ fallback
|
||||
Tier 3: Local sentence-transformers (CPU fallback, simplest)
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```bash
|
||||
# Option 1: Use Infinity (if available)
|
||||
INFINITY_URL=http://infinity:7997
|
||||
EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
|
||||
|
||||
# Option 2: Use Ollama (if Infinity unavailable)
|
||||
OLLAMA_URL=http://ollama:11434
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
# Option 3: Use local model (automatic fallback)
|
||||
# No configuration needed
|
||||
```
|
||||
|
||||
### When to Choose Ollama
|
||||
|
||||
**Choose Ollama if**:
|
||||
- You're already using Ollama for LLMs
|
||||
- You need privacy/data sovereignty
|
||||
- You have <10k documents and <100 users
|
||||
- Ease of setup is more important than max performance
|
||||
- You're in development/testing phase
|
||||
|
||||
**Choose Infinity/TEI if**:
|
||||
- You need maximum throughput (>1000 embeddings/sec)
|
||||
- You have >100k documents
|
||||
- Latency is critical (<50ms)
|
||||
- You're in production with >100 users
|
||||
|
||||
**Choose OpenAI API if**:
|
||||
- You're okay with cloud dependencies
|
||||
- You need best-in-class quality
|
||||
- Cost is not a concern (~$0.02 per 1M tokens)
|
||||
|
||||
## Production Deployment Guidance
|
||||
|
||||
### Small Production (Ollama Acceptable)
|
||||
|
||||
**Profile**:
|
||||
- 5-20 users
|
||||
- 1,000-10,000 documents
|
||||
- 50-200 searches/day
|
||||
- <2 sec acceptable latency
|
||||
|
||||
**Configuration**:
|
||||
```yaml
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4GB
|
||||
cpus: "2.0"
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia # GPU if available
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
- OLLAMA_NUM_PARALLEL=2 # Concurrent requests
|
||||
```
|
||||
|
||||
**Expected Performance**:
|
||||
- Embedding latency: 100-200ms
|
||||
- Throughput: 5-10 embeddings/sec
|
||||
- Memory: 2-3GB (model loaded)
|
||||
|
||||
### Medium Production (Use Infinity/TEI)
|
||||
|
||||
**Profile**:
|
||||
- 20-200 users
|
||||
- 10,000-1M documents
|
||||
- 500-5,000 searches/day
|
||||
- <500ms acceptable latency
|
||||
|
||||
**Recommendation**: Migrate to Infinity or TEI
|
||||
```yaml
|
||||
infinity:
|
||||
image: michaelf34/infinity:latest
|
||||
# Better throughput and latency
|
||||
```
|
||||
|
||||
### Large Production (Use Specialized Solution)
|
||||
|
||||
**Profile**:
|
||||
- >200 users
|
||||
- >1M documents
|
||||
- >5,000 searches/day
|
||||
- <100ms required latency
|
||||
|
||||
**Recommendation**: Use TEI cluster or commercial service
|
||||
|
||||
## Monitoring Considerations
|
||||
|
||||
### Key Metrics to Track
|
||||
|
||||
```python
|
||||
# Add Ollama-specific metrics
|
||||
from prometheus_client import Histogram, Counter, Gauge
|
||||
|
||||
ollama_embedding_latency = Histogram(
|
||||
'ollama_embedding_duration_seconds',
|
||||
'Ollama embedding generation time',
|
||||
['model', 'batch_size']
|
||||
)
|
||||
|
||||
ollama_batch_size = Gauge(
|
||||
'ollama_batch_size',
|
||||
'Current batch size being processed'
|
||||
)
|
||||
|
||||
ollama_errors = Counter(
|
||||
'ollama_errors_total',
|
||||
'Ollama embedding errors',
|
||||
['error_type']
|
||||
)
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```python
|
||||
async def ollama_health_check():
|
||||
"""Check Ollama availability"""
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Check server
|
||||
response = await client.get("http://ollama:11434/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
# Verify model loaded
|
||||
models = response.json().get("models", [])
|
||||
if "nomic-embed-text" not in [m["name"] for m in models]:
|
||||
return False, "Model not pulled"
|
||||
|
||||
return True, "OK"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
```
|
||||
|
||||
## Migration Path
|
||||
|
||||
### Starting with Ollama
|
||||
|
||||
**Phase 1: Development** (Ollama)
|
||||
- Use Ollama for initial development
|
||||
- Validate embedding pipeline
|
||||
- Test search quality
|
||||
|
||||
**Phase 2: Growth** (Ollama → Infinity)
|
||||
- Monitor performance metrics
|
||||
- When >50 users or >10k docs, migrate to Infinity
|
||||
- Simple config change, no code changes
|
||||
|
||||
**Phase 3: Scale** (Infinity → TEI/Commercial)
|
||||
- When >200 users or performance issues
|
||||
- Consider TEI cluster or managed services
|
||||
|
||||
### Code Compatibility
|
||||
|
||||
All embedding providers use the same interface:
|
||||
```python
|
||||
# Works with Ollama, Infinity, OpenAI, Local
|
||||
embedding = await embedding_service.embed(text)
|
||||
embeddings = await embedding_service.embed_batch(texts)
|
||||
```
|
||||
|
||||
**Migration is a configuration change only** - no code rewrite needed.
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Ollama is a solid choice for:**
|
||||
- Early-stage projects
|
||||
- Development/testing
|
||||
- Privacy-critical applications
|
||||
- Small deployments (<10 users, <10k docs)
|
||||
- Unified LLM + embedding stack
|
||||
|
||||
**But recognize its limitations:**
|
||||
- 5-9x slower than specialized engines
|
||||
- Not designed for high-throughput production
|
||||
- Batch processing can be problematic
|
||||
- Limited scalability
|
||||
|
||||
**Recommendation**:
|
||||
✅ **Include Ollama as Tier 2b** (after Infinity, before local models) in the embedding strategy. It provides a good balance of ease-of-use and privacy for small-to-medium deployments while allowing seamless migration to more performant engines as needs grow.
|
||||
|
||||
The key is designing the abstraction layer (as done in ADR-003) so migration between engines requires only configuration changes, not code rewrites.
|
||||
+74
@@ -21,3 +21,77 @@ NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
# - If these are set, OAuth mode is disabled
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# ============================================
|
||||
# Document Processing Configuration
|
||||
# ============================================
|
||||
# Enable document processing (PDF, DOCX, images, etc.)
|
||||
# Set to false to disable all document processing
|
||||
ENABLE_DOCUMENT_PROCESSING=false
|
||||
|
||||
# Default processor to use when multiple are available
|
||||
# Options: unstructured, tesseract, custom
|
||||
DOCUMENT_PROCESSOR=unstructured
|
||||
|
||||
# ============================================
|
||||
# Unstructured.io Processor
|
||||
# ============================================
|
||||
# Enable Unstructured processor (requires unstructured service in docker-compose)
|
||||
# This is a cloud-based/API processor supporting many document types
|
||||
ENABLE_UNSTRUCTURED=false
|
||||
|
||||
# Unstructured API endpoint
|
||||
UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
|
||||
# Request timeout in seconds (default: 120)
|
||||
# OCR operations can take 30-120 seconds for large documents
|
||||
UNSTRUCTURED_TIMEOUT=120
|
||||
|
||||
# Parsing strategy: auto, fast, hi_res
|
||||
# - auto: Automatically choose based on document type
|
||||
# - fast: Fast parsing without OCR
|
||||
# - hi_res: High-resolution with OCR (slowest, most accurate)
|
||||
UNSTRUCTURED_STRATEGY=auto
|
||||
|
||||
# OCR languages (comma-separated ISO 639-3 codes)
|
||||
# Common: eng=English, deu=German, fra=French, spa=Spanish
|
||||
UNSTRUCTURED_LANGUAGES=eng,deu
|
||||
|
||||
# Progress reporting interval in seconds (default: 10)
|
||||
# During long-running OCR operations, progress notifications are sent to the MCP client
|
||||
# at this interval to prevent timeouts and provide status updates
|
||||
PROGRESS_INTERVAL=10
|
||||
|
||||
# ============================================
|
||||
# Tesseract Processor (Local OCR)
|
||||
# ============================================
|
||||
# Enable Tesseract processor (requires tesseract binary installed)
|
||||
# This is a local, lightweight OCR solution for images only
|
||||
ENABLE_TESSERACT=false
|
||||
|
||||
# Path to tesseract executable (optional, auto-detected if in PATH)
|
||||
#TESSERACT_CMD=/usr/bin/tesseract
|
||||
|
||||
# OCR language (e.g., eng, deu, eng+deu for multiple)
|
||||
TESSERACT_LANG=eng
|
||||
|
||||
# ============================================
|
||||
# Custom Processor (Your own API)
|
||||
# ============================================
|
||||
# Enable custom document processor via HTTP API
|
||||
ENABLE_CUSTOM_PROCESSOR=false
|
||||
|
||||
# Unique name for your processor
|
||||
#CUSTOM_PROCESSOR_NAME=my_ocr
|
||||
|
||||
# Your custom processor API endpoint
|
||||
#CUSTOM_PROCESSOR_URL=http://localhost:9000/process
|
||||
|
||||
# Optional API key for authentication
|
||||
#CUSTOM_PROCESSOR_API_KEY=your-api-key-here
|
||||
|
||||
# Request timeout in seconds
|
||||
#CUSTOM_PROCESSOR_TIMEOUT=60
|
||||
|
||||
# Comma-separated MIME types your processor supports
|
||||
#CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg,image/png
|
||||
|
||||
+183
-26
@@ -18,13 +18,19 @@ from starlette.routing import Mount, Route
|
||||
from nextcloud_mcp_server.auth import (
|
||||
InsufficientScopeError,
|
||||
NextcloudTokenVerifier,
|
||||
discover_all_scopes,
|
||||
get_access_token_scopes,
|
||||
has_required_scopes,
|
||||
is_jwt_token,
|
||||
)
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import LOGGING_CONFIG, setup_logging
|
||||
from nextcloud_mcp_server.config import (
|
||||
LOGGING_CONFIG,
|
||||
get_document_processor_config,
|
||||
setup_logging,
|
||||
)
|
||||
from nextcloud_mcp_server.context import get_client as get_nextcloud_client
|
||||
from nextcloud_mcp_server.document_processors import get_registry
|
||||
from nextcloud_mcp_server.server import (
|
||||
configure_calendar_tools,
|
||||
configure_contacts_tools,
|
||||
@@ -39,6 +45,92 @@ from nextcloud_mcp_server.server import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def initialize_document_processors():
|
||||
"""Initialize and register document processors based on configuration.
|
||||
|
||||
This function reads the environment configuration and registers available
|
||||
processors (Unstructured, Tesseract, Custom HTTP) with the global registry.
|
||||
"""
|
||||
config = get_document_processor_config()
|
||||
|
||||
if not config["enabled"]:
|
||||
logger.info("Document processing disabled")
|
||||
return
|
||||
|
||||
registry = get_registry()
|
||||
registered_count = 0
|
||||
|
||||
# Register Unstructured processor
|
||||
if "unstructured" in config["processors"]:
|
||||
unst_config = config["processors"]["unstructured"]
|
||||
try:
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
processor = UnstructuredProcessor(
|
||||
api_url=unst_config["api_url"],
|
||||
timeout=unst_config["timeout"],
|
||||
default_strategy=unst_config["strategy"],
|
||||
default_languages=unst_config["languages"],
|
||||
progress_interval=unst_config.get("progress_interval", 10),
|
||||
)
|
||||
registry.register(processor, priority=10)
|
||||
logger.info(f"Registered Unstructured processor: {unst_config['api_url']}")
|
||||
registered_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to register Unstructured processor: {e}")
|
||||
|
||||
# Register Tesseract processor
|
||||
if "tesseract" in config["processors"]:
|
||||
tess_config = config["processors"]["tesseract"]
|
||||
try:
|
||||
from nextcloud_mcp_server.document_processors.tesseract import (
|
||||
TesseractProcessor,
|
||||
)
|
||||
|
||||
processor = TesseractProcessor(
|
||||
tesseract_cmd=tess_config.get("tesseract_cmd"),
|
||||
default_lang=tess_config["lang"],
|
||||
)
|
||||
registry.register(processor, priority=5)
|
||||
logger.info(f"Registered Tesseract processor: lang={tess_config['lang']}")
|
||||
registered_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to register Tesseract processor: {e}")
|
||||
|
||||
# Register custom processor
|
||||
if "custom" in config["processors"]:
|
||||
custom_config = config["processors"]["custom"]
|
||||
try:
|
||||
from nextcloud_mcp_server.document_processors.custom_http import (
|
||||
CustomHTTPProcessor,
|
||||
)
|
||||
|
||||
processor = CustomHTTPProcessor(
|
||||
name=custom_config["name"],
|
||||
api_url=custom_config["api_url"],
|
||||
api_key=custom_config.get("api_key"),
|
||||
timeout=custom_config["timeout"],
|
||||
supported_types=custom_config["supported_types"],
|
||||
)
|
||||
registry.register(processor, priority=1)
|
||||
logger.info(
|
||||
f"Registered Custom processor '{custom_config['name']}': {custom_config['api_url']}"
|
||||
)
|
||||
registered_count += 1
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to register Custom processor: {e}")
|
||||
|
||||
if registered_count > 0:
|
||||
logger.info(
|
||||
f"Document processing initialized with {registered_count} processor(s): "
|
||||
f"{', '.join(registry.list_processors())}"
|
||||
)
|
||||
else:
|
||||
logger.warning("Document processing enabled but no processors registered")
|
||||
|
||||
|
||||
def validate_pkce_support(discovery: dict, discovery_url: str) -> None:
|
||||
"""
|
||||
Validate that the OIDC provider properly advertises PKCE support.
|
||||
@@ -192,7 +284,15 @@ async def load_oauth_client_credentials(
|
||||
redirect_uris = [f"{mcp_server_url}/oauth/callback"]
|
||||
|
||||
# Get scopes from environment or use defaults
|
||||
# Default: all app-specific read/write scopes
|
||||
# Note: Client registration happens BEFORE tools are registered, so we can't
|
||||
# dynamically discover scopes here. These scopes define the "maximum allowed"
|
||||
# scopes for this OAuth client. The actual per-tool scope enforcement happens
|
||||
# via @require_scopes decorators, and the PRM endpoint advertises the actual
|
||||
# supported scopes dynamically.
|
||||
#
|
||||
# IMPORTANT: Keep this list in sync with all @require_scopes decorators
|
||||
# when adding new apps, or set NEXTCLOUD_OIDC_SCOPES environment variable
|
||||
# to override.
|
||||
default_scopes = (
|
||||
"openid profile email "
|
||||
"notes:read notes:write "
|
||||
@@ -257,6 +357,9 @@ async def app_lifespan_basic(server: FastMCP) -> AsyncIterator[AppContext]:
|
||||
client = NextcloudClient.from_env()
|
||||
logger.info("Client initialization complete")
|
||||
|
||||
# Initialize document processors
|
||||
initialize_document_processors()
|
||||
|
||||
try:
|
||||
yield AppContext(client=client)
|
||||
finally:
|
||||
@@ -317,6 +420,9 @@ async def app_lifespan_oauth(server: FastMCP) -> AsyncIterator[OAuthAppContext]:
|
||||
|
||||
logger.info("OAuth initialization complete")
|
||||
|
||||
# Initialize document processors
|
||||
initialize_document_processors()
|
||||
|
||||
try:
|
||||
yield OAuthAppContext(
|
||||
nextcloud_host=nextcloud_host, token_verifier=token_verifier
|
||||
@@ -542,12 +648,79 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
await stack.enter_async_context(mcp.session_manager.run())
|
||||
yield
|
||||
|
||||
# Health check endpoints for Kubernetes probes
|
||||
def health_live(request):
|
||||
"""Liveness probe endpoint.
|
||||
|
||||
Returns 200 OK if the application process is running.
|
||||
This is a simple check that doesn't verify external dependencies.
|
||||
"""
|
||||
return JSONResponse(
|
||||
{
|
||||
"status": "alive",
|
||||
"mode": "oauth" if oauth_enabled else "basic",
|
||||
}
|
||||
)
|
||||
|
||||
async def health_ready(request):
|
||||
"""Readiness probe endpoint.
|
||||
|
||||
Returns 200 OK if the application is ready to serve traffic.
|
||||
Checks that required configuration is present.
|
||||
"""
|
||||
checks = {}
|
||||
is_ready = True
|
||||
|
||||
# Check Nextcloud host configuration
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST")
|
||||
if nextcloud_host:
|
||||
checks["nextcloud_configured"] = "ok"
|
||||
else:
|
||||
checks["nextcloud_configured"] = "error: NEXTCLOUD_HOST not set"
|
||||
is_ready = False
|
||||
|
||||
# Check authentication configuration
|
||||
if oauth_enabled:
|
||||
# OAuth mode - just verify we got this far (token_verifier initialized in lifespan)
|
||||
checks["auth_mode"] = "oauth"
|
||||
checks["auth_configured"] = "ok"
|
||||
else:
|
||||
# BasicAuth mode - verify credentials are set
|
||||
username = os.getenv("NEXTCLOUD_USERNAME")
|
||||
password = os.getenv("NEXTCLOUD_PASSWORD")
|
||||
if username and password:
|
||||
checks["auth_mode"] = "basic"
|
||||
checks["auth_configured"] = "ok"
|
||||
else:
|
||||
checks["auth_mode"] = "basic"
|
||||
checks["auth_configured"] = "error: credentials not set"
|
||||
is_ready = False
|
||||
|
||||
status_code = 200 if is_ready else 503
|
||||
return JSONResponse(
|
||||
{
|
||||
"status": "ready" if is_ready else "not_ready",
|
||||
"checks": checks,
|
||||
},
|
||||
status_code=status_code,
|
||||
)
|
||||
|
||||
# Add Protected Resource Metadata (PRM) endpoint for OAuth mode
|
||||
routes = []
|
||||
|
||||
# Add health check routes (available in both OAuth and BasicAuth modes)
|
||||
routes.append(Route("/health/live", health_live, methods=["GET"]))
|
||||
routes.append(Route("/health/ready", health_ready, methods=["GET"]))
|
||||
logger.info("Health check endpoints enabled: /health/live, /health/ready")
|
||||
|
||||
if oauth_enabled:
|
||||
|
||||
def oauth_protected_resource_metadata(request):
|
||||
"""RFC 9728 Protected Resource Metadata endpoint."""
|
||||
"""RFC 9728 Protected Resource Metadata endpoint.
|
||||
|
||||
Dynamically discovers supported scopes from registered MCP tools.
|
||||
This ensures the advertised scopes always match the actual tool requirements.
|
||||
"""
|
||||
mcp_server_url = os.getenv(
|
||||
"NEXTCLOUD_MCP_SERVER_URL", "http://localhost:8000"
|
||||
)
|
||||
@@ -561,30 +734,14 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
# Fallback to NEXTCLOUD_HOST if PUBLIC_ISSUER_URL not set
|
||||
public_issuer_url = os.getenv("NEXTCLOUD_HOST", "")
|
||||
|
||||
# Dynamically discover all scopes from registered tools
|
||||
# This provides a single source of truth based on @require_scopes decorators
|
||||
supported_scopes = discover_all_scopes(mcp)
|
||||
|
||||
return JSONResponse(
|
||||
{
|
||||
"resource": resource_url,
|
||||
"scopes_supported": [
|
||||
"openid",
|
||||
"notes:read",
|
||||
"notes:write",
|
||||
"calendar:read",
|
||||
"calendar:write",
|
||||
"todo:read",
|
||||
"todo:write",
|
||||
"contacts:read",
|
||||
"contacts:write",
|
||||
"cookbook:read",
|
||||
"cookbook:write",
|
||||
"deck:read",
|
||||
"deck:write",
|
||||
"tables:read",
|
||||
"tables:write",
|
||||
"files:read",
|
||||
"files:write",
|
||||
"sharing:read",
|
||||
"sharing:write",
|
||||
],
|
||||
"scopes_supported": supported_scopes,
|
||||
"authorization_servers": [public_issuer_url],
|
||||
"bearer_methods_supported": ["header"],
|
||||
"resource_signing_alg_values_supported": ["RS256"],
|
||||
@@ -735,9 +892,9 @@ def get_app(transport: str = "sse", enabled_apps: list[str] | None = None):
|
||||
@click.option(
|
||||
"--oauth-scopes",
|
||||
envvar="NEXTCLOUD_OIDC_SCOPES",
|
||||
default="openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write",
|
||||
default="openid profile email notes:read notes:write calendar:read calendar:write todo:read todo:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write",
|
||||
show_default=True,
|
||||
help="OAuth scopes to request (can also use NEXTCLOUD_OIDC_SCOPES env var)",
|
||||
help="OAuth scopes to request during client registration. These define the maximum allowed scopes for the client. Note: Actual supported scopes are discovered dynamically from MCP tools at runtime. (can also use NEXTCLOUD_OIDC_SCOPES env var)",
|
||||
)
|
||||
@click.option(
|
||||
"--oauth-token-type",
|
||||
|
||||
@@ -7,6 +7,7 @@ from .scope_authorization import (
|
||||
InsufficientScopeError,
|
||||
ScopeAuthorizationError,
|
||||
check_scopes,
|
||||
discover_all_scopes,
|
||||
get_access_token_scopes,
|
||||
get_required_scopes,
|
||||
has_required_scopes,
|
||||
@@ -25,6 +26,7 @@ __all__ = [
|
||||
"ScopeAuthorizationError",
|
||||
"InsufficientScopeError",
|
||||
"check_scopes",
|
||||
"discover_all_scopes",
|
||||
"get_access_token_scopes",
|
||||
"get_required_scopes",
|
||||
"has_required_scopes",
|
||||
|
||||
@@ -276,3 +276,68 @@ def has_required_scopes(func: Callable, user_scopes: set[str]) -> bool:
|
||||
|
||||
# Check if user has all required scopes
|
||||
return set(required).issubset(user_scopes)
|
||||
|
||||
|
||||
def discover_all_scopes(mcp) -> list[str]:
|
||||
"""
|
||||
Dynamically discover all OAuth scopes required by registered MCP tools.
|
||||
|
||||
This function inspects all registered tools and extracts their required scopes
|
||||
from the @require_scopes decorator metadata. It provides a single source of truth
|
||||
for available scopes based on the actual tool implementations.
|
||||
|
||||
Args:
|
||||
mcp: FastMCP instance with registered tools
|
||||
|
||||
Returns:
|
||||
Sorted list of unique scope strings, including base OIDC scopes
|
||||
|
||||
Example:
|
||||
```python
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
mcp = FastMCP("My Server")
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("notes:read")
|
||||
async def get_notes():
|
||||
pass
|
||||
|
||||
@mcp.tool()
|
||||
@require_scopes("notes:write")
|
||||
async def create_note():
|
||||
pass
|
||||
|
||||
scopes = discover_all_scopes(mcp)
|
||||
# Returns: ["notes:read", "notes:write", "openid", "profile", "email"]
|
||||
```
|
||||
|
||||
Note:
|
||||
- Base OIDC scopes (openid, profile, email) are always included
|
||||
- Scopes are deduplicated and sorted alphabetically
|
||||
- Only scopes from decorated tools are included
|
||||
- Must be called after tools are registered
|
||||
"""
|
||||
# Start with base OIDC scopes that are always required
|
||||
all_scopes = {"openid", "profile", "email"}
|
||||
|
||||
# Get all registered tools
|
||||
try:
|
||||
tools = mcp._tool_manager.list_tools()
|
||||
except AttributeError:
|
||||
logger.warning("FastMCP instance does not have _tool_manager attribute")
|
||||
return sorted(all_scopes)
|
||||
|
||||
# Extract scopes from each tool
|
||||
for tool in tools:
|
||||
# Get the original function (tools have a .fn attribute)
|
||||
func = getattr(tool, "fn", None)
|
||||
if func is None:
|
||||
continue
|
||||
|
||||
# Extract scopes using existing helper
|
||||
tool_scopes = get_required_scopes(func)
|
||||
all_scopes.update(tool_scopes)
|
||||
|
||||
# Return sorted list of unique scopes
|
||||
return sorted(all_scopes)
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import logging.config
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
LOGGING_CONFIG = {
|
||||
"version": 1,
|
||||
@@ -51,3 +53,68 @@ LOGGING_CONFIG = {
|
||||
|
||||
def setup_logging():
|
||||
logging.config.dictConfig(LOGGING_CONFIG)
|
||||
|
||||
|
||||
# Document Processing Configuration
|
||||
|
||||
|
||||
def get_document_processor_config() -> dict[str, Any]:
|
||||
"""Get document processor configuration from environment.
|
||||
|
||||
Returns:
|
||||
Dict with processor configs:
|
||||
{
|
||||
"enabled": bool,
|
||||
"default_processor": str,
|
||||
"processors": {
|
||||
"unstructured": {...},
|
||||
"tesseract": {...},
|
||||
"custom": {...},
|
||||
}
|
||||
}
|
||||
"""
|
||||
config: dict[str, Any] = {
|
||||
"enabled": os.getenv("ENABLE_DOCUMENT_PROCESSING", "false").lower() == "true",
|
||||
"default_processor": os.getenv("DOCUMENT_PROCESSOR", "unstructured"),
|
||||
"processors": {},
|
||||
}
|
||||
|
||||
# Unstructured configuration
|
||||
if os.getenv("ENABLE_UNSTRUCTURED", "false").lower() == "true":
|
||||
config["processors"]["unstructured"] = {
|
||||
"api_url": os.getenv("UNSTRUCTURED_API_URL", "http://unstructured:8000"),
|
||||
"timeout": int(os.getenv("UNSTRUCTURED_TIMEOUT", "120")),
|
||||
"strategy": os.getenv("UNSTRUCTURED_STRATEGY", "auto"),
|
||||
"languages": [
|
||||
lang.strip()
|
||||
for lang in os.getenv("UNSTRUCTURED_LANGUAGES", "eng,deu").split(",")
|
||||
if lang.strip()
|
||||
],
|
||||
"progress_interval": int(os.getenv("PROGRESS_INTERVAL", "10")),
|
||||
}
|
||||
|
||||
# Tesseract configuration
|
||||
if os.getenv("ENABLE_TESSERACT", "false").lower() == "true":
|
||||
config["processors"]["tesseract"] = {
|
||||
"tesseract_cmd": os.getenv("TESSERACT_CMD"), # None = auto-detect
|
||||
"lang": os.getenv("TESSERACT_LANG", "eng"),
|
||||
}
|
||||
|
||||
# Custom processor (via HTTP API)
|
||||
if os.getenv("ENABLE_CUSTOM_PROCESSOR", "false").lower() == "true":
|
||||
custom_url = os.getenv("CUSTOM_PROCESSOR_URL")
|
||||
if custom_url:
|
||||
supported_types_str = os.getenv("CUSTOM_PROCESSOR_TYPES", "application/pdf")
|
||||
supported_types = {
|
||||
t.strip() for t in supported_types_str.split(",") if t.strip()
|
||||
}
|
||||
|
||||
config["processors"]["custom"] = {
|
||||
"name": os.getenv("CUSTOM_PROCESSOR_NAME", "custom"),
|
||||
"api_url": custom_url,
|
||||
"api_key": os.getenv("CUSTOM_PROCESSOR_API_KEY"),
|
||||
"timeout": int(os.getenv("CUSTOM_PROCESSOR_TIMEOUT", "60")),
|
||||
"supported_types": supported_types,
|
||||
}
|
||||
|
||||
return config
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
"""Document processing plugins for extracting text from various file formats."""
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
from .registry import ProcessorRegistry, get_registry
|
||||
|
||||
__all__ = [
|
||||
"DocumentProcessor",
|
||||
"ProcessingResult",
|
||||
"ProcessorError",
|
||||
"ProcessorRegistry",
|
||||
"get_registry",
|
||||
]
|
||||
@@ -0,0 +1,126 @@
|
||||
"""Abstract base class for document processing plugins."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ProcessingResult(BaseModel):
|
||||
"""Standardized result from any document processor."""
|
||||
|
||||
text: str
|
||||
"""Extracted text content"""
|
||||
|
||||
metadata: dict[str, Any]
|
||||
"""Processor-specific metadata"""
|
||||
|
||||
processor: str
|
||||
"""Name of processor that handled this (e.g., 'unstructured', 'tesseract')"""
|
||||
|
||||
success: bool = True
|
||||
"""Whether processing succeeded"""
|
||||
|
||||
error: Optional[str] = None
|
||||
"""Error message if processing failed"""
|
||||
|
||||
|
||||
class DocumentProcessor(ABC):
|
||||
"""Abstract base class for document processing plugins.
|
||||
|
||||
Document processors extract text from various file formats (PDF, DOCX, images, etc.).
|
||||
Each processor implements this interface and can be registered with the ProcessorRegistry.
|
||||
|
||||
Example:
|
||||
class MyProcessor(DocumentProcessor):
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "my_processor"
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return {"application/pdf", "image/jpeg"}
|
||||
|
||||
async def process(self, content: bytes, content_type: str, **kwargs) -> ProcessingResult:
|
||||
# Extract text from content
|
||||
return ProcessingResult(text="...", metadata={}, processor=self.name)
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
return True
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Unique identifier for this processor (e.g., 'unstructured', 'tesseract')."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
"""Set of MIME types this processor can handle.
|
||||
|
||||
Examples: {"application/pdf", "image/jpeg", "image/png"}
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process a document and extract text.
|
||||
|
||||
Args:
|
||||
content: Document bytes
|
||||
content_type: MIME type of the document
|
||||
filename: Optional filename for format detection
|
||||
options: Processor-specific options (e.g., OCR language, strategy)
|
||||
progress_callback: Optional async callback for progress updates.
|
||||
Called as: await progress_callback(progress, total, message)
|
||||
- progress: Current progress value (monotonically increasing)
|
||||
- total: Optional total value (None if unknown)
|
||||
- message: Optional human-readable status message
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If processing fails
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if processor is available and healthy.
|
||||
|
||||
Returns:
|
||||
True if processor is ready to use, False otherwise
|
||||
"""
|
||||
pass
|
||||
|
||||
def supports(self, content_type: str) -> bool:
|
||||
"""Check if this processor supports the given MIME type.
|
||||
|
||||
Args:
|
||||
content_type: MIME type (may include parameters like "application/pdf; charset=utf-8")
|
||||
|
||||
Returns:
|
||||
True if this processor can handle the type
|
||||
"""
|
||||
# Strip parameters from content type
|
||||
base_type = content_type.split(";")[0].strip().lower()
|
||||
return base_type in self.supported_mime_types
|
||||
|
||||
|
||||
class ProcessorError(Exception):
|
||||
"""Raised when document processing fails."""
|
||||
|
||||
pass
|
||||
@@ -0,0 +1,150 @@
|
||||
"""Generic HTTP API processor wrapper for custom document processing services."""
|
||||
|
||||
import logging
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CustomHTTPProcessor(DocumentProcessor):
|
||||
"""Generic HTTP API processor wrapper.
|
||||
|
||||
Allows integration with any custom document processing API that follows
|
||||
a simple request/response pattern. This makes it easy to integrate your
|
||||
own text extraction services without writing a full processor.
|
||||
|
||||
Expected API Contract:
|
||||
- POST request with file as multipart/form-data
|
||||
- Response: {"text": "extracted text", "metadata": {...}}
|
||||
|
||||
Example:
|
||||
processor = CustomHTTPProcessor(
|
||||
name="my_ocr",
|
||||
api_url="https://my-ocr-service.com/process",
|
||||
api_key="secret",
|
||||
supported_types={"application/pdf", "image/jpeg"},
|
||||
)
|
||||
result = await processor.process(pdf_bytes, "application/pdf")
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str,
|
||||
api_key: Optional[str] = None,
|
||||
timeout: int = 60,
|
||||
supported_types: Optional[set[str]] = None,
|
||||
name: str = "custom",
|
||||
):
|
||||
"""Initialize custom HTTP processor.
|
||||
|
||||
Args:
|
||||
api_url: Your API endpoint (should accept POST with multipart/form-data)
|
||||
api_key: Optional API key for authentication (sent as Bearer token)
|
||||
timeout: Request timeout in seconds (default: 60)
|
||||
supported_types: MIME types your API supports
|
||||
name: Unique name for this processor (default: "custom")
|
||||
"""
|
||||
self.api_url = api_url
|
||||
self.api_key = api_key
|
||||
self.timeout = timeout
|
||||
self._name = name
|
||||
self._supported_types = supported_types or set()
|
||||
|
||||
logger.info(f"Initialized CustomHTTPProcessor: {name} -> {api_url}")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return self._name
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return self._supported_types
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process via custom HTTP API.
|
||||
|
||||
Args:
|
||||
content: Document bytes
|
||||
content_type: MIME type
|
||||
filename: Optional filename
|
||||
options: Custom options (passed as form data to API)
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If API call fails
|
||||
"""
|
||||
options = options or {}
|
||||
|
||||
# Prepare request
|
||||
files = {"file": (filename or "document", content, content_type)}
|
||||
headers = {}
|
||||
|
||||
if self.api_key:
|
||||
headers["Authorization"] = f"Bearer {self.api_key}"
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
self.api_url,
|
||||
files=files,
|
||||
headers=headers,
|
||||
data=options, # Pass options as form data
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
result = response.json()
|
||||
text = result.get("text", "")
|
||||
metadata = result.get("metadata", {})
|
||||
|
||||
logger.debug(
|
||||
f"Custom processor '{self.name}' extracted {len(text)} characters"
|
||||
)
|
||||
|
||||
return ProcessingResult(
|
||||
text=text,
|
||||
metadata=metadata,
|
||||
processor=self.name,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Custom processor '{self.name}' HTTP error: {e}")
|
||||
raise ProcessorError(f"API call failed: {str(e)}") from e
|
||||
except Exception as e:
|
||||
logger.error(f"Custom processor '{self.name}' failed: {e}")
|
||||
raise ProcessorError(f"Processing failed: {str(e)}") from e
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if custom API is available.
|
||||
|
||||
Returns:
|
||||
True if API responds with status < 500
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
# Try GET request to check availability
|
||||
response = await client.get(
|
||||
self.api_url,
|
||||
headers={"User-Agent": "nextcloud-mcp-server"},
|
||||
)
|
||||
return response.status_code < 500
|
||||
except Exception as e:
|
||||
logger.warning(f"Custom processor '{self.name}' health check failed: {e}")
|
||||
return False
|
||||
@@ -0,0 +1,171 @@
|
||||
"""Central registry for document processors."""
|
||||
|
||||
import logging
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ProcessorRegistry:
|
||||
"""Central registry for document processors.
|
||||
|
||||
Manages registration and routing of document processing requests to
|
||||
appropriate processors based on MIME types and priorities.
|
||||
|
||||
Example:
|
||||
registry = ProcessorRegistry()
|
||||
registry.register(UnstructuredProcessor(...), priority=10)
|
||||
registry.register(TesseractProcessor(...), priority=5)
|
||||
|
||||
# Auto-select processor based on MIME type
|
||||
result = await registry.process(pdf_bytes, "application/pdf")
|
||||
|
||||
# Force specific processor
|
||||
result = await registry.process(img_bytes, "image/png", processor_name="tesseract")
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._processors: dict[str, tuple[DocumentProcessor, int]] = {}
|
||||
self._priority_order: list[str] = []
|
||||
|
||||
def register(self, processor: DocumentProcessor, priority: int = 0):
|
||||
"""Register a document processor.
|
||||
|
||||
Args:
|
||||
processor: Processor instance to register
|
||||
priority: Higher priority processors are tried first (default: 0)
|
||||
"""
|
||||
name = processor.name
|
||||
|
||||
if name in self._processors:
|
||||
logger.warning(f"Processor '{name}' already registered, replacing")
|
||||
|
||||
self._processors[name] = (processor, priority)
|
||||
|
||||
# Update priority order
|
||||
if name in self._priority_order:
|
||||
self._priority_order.remove(name)
|
||||
|
||||
# Insert in priority order (higher priority first)
|
||||
inserted = False
|
||||
for i, existing_name in enumerate(self._priority_order):
|
||||
existing_priority = self._processors[existing_name][1]
|
||||
if priority > existing_priority:
|
||||
self._priority_order.insert(i, name)
|
||||
inserted = True
|
||||
break
|
||||
|
||||
if not inserted:
|
||||
self._priority_order.append(name)
|
||||
|
||||
logger.info(
|
||||
f"Registered processor: {name} "
|
||||
f"(priority={priority}, supports={len(processor.supported_mime_types)} types)"
|
||||
)
|
||||
|
||||
def get_processor(self, name: str) -> Optional[DocumentProcessor]:
|
||||
"""Get a processor by name.
|
||||
|
||||
Args:
|
||||
name: Processor name
|
||||
|
||||
Returns:
|
||||
DocumentProcessor instance or None if not found
|
||||
"""
|
||||
if name in self._processors:
|
||||
return self._processors[name][0]
|
||||
return None
|
||||
|
||||
def find_processor(self, content_type: str) -> Optional[DocumentProcessor]:
|
||||
"""Find the first processor that supports the given MIME type.
|
||||
|
||||
Processors are checked in priority order (highest priority first).
|
||||
|
||||
Args:
|
||||
content_type: MIME type to match
|
||||
|
||||
Returns:
|
||||
First matching processor or None
|
||||
"""
|
||||
for name in self._priority_order:
|
||||
processor = self._processors[name][0]
|
||||
if processor.supports(content_type):
|
||||
logger.debug(f"Found processor '{name}' for type '{content_type}'")
|
||||
return processor
|
||||
|
||||
logger.debug(f"No processor found for type '{content_type}'")
|
||||
return None
|
||||
|
||||
def list_processors(self) -> list[str]:
|
||||
"""List all registered processor names in priority order.
|
||||
|
||||
Returns:
|
||||
List of processor names (highest priority first)
|
||||
"""
|
||||
return list(self._priority_order)
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
processor_name: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process a document using available processors.
|
||||
|
||||
Args:
|
||||
content: Document bytes
|
||||
content_type: MIME type
|
||||
filename: Optional filename for format detection
|
||||
processor_name: Force specific processor (or None for auto-select)
|
||||
options: Processing options passed to processor
|
||||
progress_callback: Optional async callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If no processor found or processing fails
|
||||
"""
|
||||
# Find processor
|
||||
if processor_name:
|
||||
processor = self.get_processor(processor_name)
|
||||
if not processor:
|
||||
raise ProcessorError(
|
||||
f"Processor '{processor_name}' not found. "
|
||||
f"Available: {', '.join(self.list_processors())}"
|
||||
)
|
||||
else:
|
||||
processor = self.find_processor(content_type)
|
||||
if not processor:
|
||||
raise ProcessorError(
|
||||
f"No processor found for type: {content_type}. "
|
||||
f"Registered processors: {', '.join(self.list_processors())}"
|
||||
)
|
||||
|
||||
logger.info(f"Processing with '{processor.name}' processor")
|
||||
|
||||
# Process
|
||||
return await processor.process(
|
||||
content, content_type, filename, options, progress_callback
|
||||
)
|
||||
|
||||
|
||||
# Global registry instance
|
||||
_registry = ProcessorRegistry()
|
||||
|
||||
|
||||
def get_registry() -> ProcessorRegistry:
|
||||
"""Get the global processor registry.
|
||||
|
||||
Returns:
|
||||
Singleton ProcessorRegistry instance
|
||||
"""
|
||||
return _registry
|
||||
@@ -0,0 +1,165 @@
|
||||
"""Document processor using Tesseract OCR (local)."""
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import io
|
||||
|
||||
import pytesseract
|
||||
from PIL import Image
|
||||
|
||||
TESSERACT_AVAILABLE = True
|
||||
except ImportError:
|
||||
TESSERACT_AVAILABLE = False
|
||||
|
||||
|
||||
class TesseractProcessor(DocumentProcessor):
|
||||
"""Document processor using Tesseract OCR (local).
|
||||
|
||||
This processor runs OCR locally using the Tesseract engine, which is
|
||||
faster and more lightweight than cloud-based solutions but requires
|
||||
Tesseract to be installed on the system.
|
||||
|
||||
Requirements:
|
||||
- tesseract binary installed (e.g., apt install tesseract-ocr)
|
||||
- Python packages: pip install pytesseract pillow
|
||||
|
||||
Example:
|
||||
processor = TesseractProcessor(default_lang="eng+deu")
|
||||
result = await processor.process(image_bytes, "image/jpeg")
|
||||
"""
|
||||
|
||||
SUPPORTED_TYPES = {
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/tiff",
|
||||
"image/bmp",
|
||||
"image/gif",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
tesseract_cmd: Optional[str] = None,
|
||||
default_lang: str = "eng",
|
||||
):
|
||||
"""Initialize Tesseract processor.
|
||||
|
||||
Args:
|
||||
tesseract_cmd: Path to tesseract executable (None = auto-detect)
|
||||
default_lang: Default OCR language (e.g., "eng", "deu", "eng+deu")
|
||||
|
||||
Raises:
|
||||
ProcessorError: If Tesseract or required packages not available
|
||||
"""
|
||||
if not TESSERACT_AVAILABLE:
|
||||
raise ProcessorError(
|
||||
"Tesseract processor requires: pip install pytesseract pillow"
|
||||
)
|
||||
|
||||
if tesseract_cmd:
|
||||
pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
|
||||
elif not shutil.which("tesseract"):
|
||||
raise ProcessorError(
|
||||
"Tesseract not found in PATH. Install with: apt install tesseract-ocr"
|
||||
)
|
||||
|
||||
self.default_lang = default_lang
|
||||
logger.info(f"Initialized TesseractProcessor: lang={default_lang}")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "tesseract"
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return self.SUPPORTED_TYPES
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process image via Tesseract OCR.
|
||||
|
||||
Args:
|
||||
content: Image bytes
|
||||
content_type: Image MIME type
|
||||
filename: Optional filename
|
||||
options: Processing options:
|
||||
- lang: OCR language(s) (default: from init)
|
||||
- config: Tesseract config string
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If OCR fails
|
||||
"""
|
||||
options = options or {}
|
||||
lang = options.get("lang", self.default_lang)
|
||||
config = options.get("config", "")
|
||||
|
||||
try:
|
||||
# Load image
|
||||
image = Image.open(io.BytesIO(content))
|
||||
|
||||
# Run OCR
|
||||
text = pytesseract.image_to_string(image, lang=lang, config=config)
|
||||
|
||||
# Get additional data for confidence scores
|
||||
data = pytesseract.image_to_data(
|
||||
image, lang=lang, output_type=pytesseract.Output.DICT
|
||||
)
|
||||
|
||||
# Calculate average confidence
|
||||
confidences = [c for c in data["conf"] if c != -1]
|
||||
avg_confidence = sum(confidences) / len(confidences) if confidences else 0
|
||||
|
||||
metadata = {
|
||||
"text_length": len(text),
|
||||
"language": lang,
|
||||
"image_size": image.size,
|
||||
"image_mode": image.mode,
|
||||
"confidence": round(avg_confidence, 2),
|
||||
"words_detected": len([c for c in data["conf"] if c != -1]),
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
f"Tesseract OCR completed: {len(text)} chars, "
|
||||
f"confidence={avg_confidence:.1f}%"
|
||||
)
|
||||
|
||||
return ProcessingResult(
|
||||
text=text.strip(),
|
||||
metadata=metadata,
|
||||
processor=self.name,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Tesseract processing failed: {e}")
|
||||
raise ProcessorError(f"OCR failed: {str(e)}") from e
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if Tesseract is available.
|
||||
|
||||
Returns:
|
||||
True if Tesseract is installed and working
|
||||
"""
|
||||
try:
|
||||
pytesseract.get_tesseract_version()
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
@@ -0,0 +1,310 @@
|
||||
"""Document processor using Unstructured.io API."""
|
||||
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
import anyio
|
||||
import httpx
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UnstructuredProcessor(DocumentProcessor):
|
||||
"""Document processor using Unstructured.io API.
|
||||
|
||||
The Unstructured API provides document parsing capabilities for various formats
|
||||
including PDF, DOCX, images with OCR, and more.
|
||||
|
||||
API Documentation: https://docs.unstructured.io/api-reference/api-services/api-parameters
|
||||
"""
|
||||
|
||||
# Supported MIME types for Unstructured
|
||||
SUPPORTED_TYPES = {
|
||||
"application/pdf",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/msword",
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.ms-powerpoint",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.ms-excel",
|
||||
"application/rtf",
|
||||
"text/rtf",
|
||||
"application/vnd.oasis.opendocument.text",
|
||||
"application/epub+zip",
|
||||
"message/rfc822",
|
||||
"application/vnd.ms-outlook",
|
||||
"image/jpeg",
|
||||
"image/png",
|
||||
"image/tiff",
|
||||
"image/bmp",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_url: str,
|
||||
timeout: int = 120,
|
||||
default_strategy: str = "auto",
|
||||
default_languages: Optional[list[str]] = None,
|
||||
progress_interval: int = 10,
|
||||
):
|
||||
"""Initialize Unstructured processor.
|
||||
|
||||
Args:
|
||||
api_url: Unstructured API endpoint
|
||||
timeout: Request timeout in seconds (default: 120)
|
||||
default_strategy: Default parsing strategy - "auto", "fast", or "hi_res"
|
||||
default_languages: Default OCR language codes (e.g., ["eng", "deu"])
|
||||
progress_interval: Seconds between progress updates (default: 10)
|
||||
"""
|
||||
self.api_url = api_url
|
||||
self.timeout = timeout
|
||||
self.default_strategy = default_strategy
|
||||
self.default_languages = default_languages or ["eng"]
|
||||
self.progress_interval = progress_interval
|
||||
|
||||
logger.info(
|
||||
f"Initialized UnstructuredProcessor: {api_url}, "
|
||||
f"strategy={default_strategy}, languages={self.default_languages}, "
|
||||
f"progress_interval={progress_interval}s"
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "unstructured"
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return self.SUPPORTED_TYPES
|
||||
|
||||
async def _run_progress_poller(
|
||||
self,
|
||||
stop_event: anyio.Event,
|
||||
progress_callback: Callable[
|
||||
[float, Optional[float], Optional[str]], Awaitable[None]
|
||||
],
|
||||
start_time: float,
|
||||
):
|
||||
"""Run progress poller that reports status every N seconds.
|
||||
|
||||
Args:
|
||||
stop_event: Event to signal when processing is complete
|
||||
progress_callback: Async callback to report progress
|
||||
start_time: Time when processing started (from time.time())
|
||||
"""
|
||||
logger.debug("Starting progress poller")
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
# Wait for the event to be set, with a timeout equal to progress_interval
|
||||
with anyio.fail_after(self.progress_interval):
|
||||
await stop_event.wait()
|
||||
# If wait() finished, the event was set (processing complete)
|
||||
break
|
||||
except TimeoutError:
|
||||
# Timeout occurred - time to send a progress update
|
||||
if not stop_event.is_set(): # Double-check in case of race condition
|
||||
elapsed = int(time.time() - start_time)
|
||||
message = (
|
||||
f"Processing document with unstructured... ({elapsed}s elapsed)"
|
||||
)
|
||||
try:
|
||||
await progress_callback(
|
||||
progress=float(elapsed),
|
||||
total=None, # Unknown total duration
|
||||
message=message,
|
||||
)
|
||||
logger.debug(f"Progress update sent: {elapsed}s elapsed")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to send progress update: {e}")
|
||||
logger.debug("Progress poller stopped")
|
||||
|
||||
async def _make_api_request(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str],
|
||||
strategy: str,
|
||||
languages: list[str],
|
||||
extract_image_block_types: Optional[list[str]],
|
||||
) -> ProcessingResult:
|
||||
"""Make the actual API request to Unstructured.
|
||||
|
||||
Args:
|
||||
content: Document bytes
|
||||
content_type: MIME type
|
||||
filename: Optional filename
|
||||
strategy: Processing strategy
|
||||
languages: OCR languages
|
||||
extract_image_block_types: Image element types to extract
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If processing fails
|
||||
"""
|
||||
# Prepare multipart request
|
||||
files = {
|
||||
"files": (
|
||||
filename or "document",
|
||||
io.BytesIO(content),
|
||||
content_type or "application/octet-stream",
|
||||
)
|
||||
}
|
||||
|
||||
data = {
|
||||
"strategy": strategy,
|
||||
"languages": ",".join(languages),
|
||||
}
|
||||
|
||||
if extract_image_block_types:
|
||||
data["extract_image_block_types"] = ",".join(extract_image_block_types)
|
||||
|
||||
logger.debug(
|
||||
f"Processing with Unstructured API: strategy={strategy}, languages={languages}"
|
||||
)
|
||||
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
||||
response = await client.post(
|
||||
f"{self.api_url}/general/v0/general",
|
||||
files=files,
|
||||
data=data,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse response
|
||||
elements = response.json()
|
||||
|
||||
# Extract text and metadata
|
||||
texts = []
|
||||
element_types: dict[str, int] = {}
|
||||
|
||||
for element in elements:
|
||||
if "text" in element and element["text"]:
|
||||
texts.append(element["text"])
|
||||
|
||||
el_type = element.get("type", "unknown")
|
||||
element_types[el_type] = element_types.get(el_type, 0) + 1
|
||||
|
||||
parsed_text = "\n\n".join(texts)
|
||||
|
||||
metadata = {
|
||||
"element_count": len(elements),
|
||||
"text_length": len(parsed_text),
|
||||
"element_types": element_types,
|
||||
"strategy": strategy,
|
||||
"languages": languages,
|
||||
}
|
||||
|
||||
logger.debug(
|
||||
f"Successfully processed: {len(elements)} elements, "
|
||||
f"{len(parsed_text)} characters"
|
||||
)
|
||||
|
||||
return ProcessingResult(
|
||||
text=parsed_text,
|
||||
metadata=metadata,
|
||||
processor=self.name,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"Unstructured API HTTP error: {e}")
|
||||
raise ProcessorError(f"HTTP error: {str(e)}") from e
|
||||
except Exception as e:
|
||||
logger.error(f"Unstructured API processing failed: {e}")
|
||||
raise ProcessorError(f"Processing failed: {str(e)}") from e
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process document via Unstructured API.
|
||||
|
||||
Args:
|
||||
content: Document bytes
|
||||
content_type: MIME type
|
||||
filename: Optional filename for format detection
|
||||
options: Processing options:
|
||||
- strategy: "auto", "fast", or "hi_res" (default: from init)
|
||||
- languages: List of language codes (default: from init)
|
||||
- extract_image_block_types: Types of image elements to extract
|
||||
progress_callback: Optional async callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If processing fails
|
||||
"""
|
||||
options = options or {}
|
||||
|
||||
# Extract options with defaults
|
||||
strategy = options.get("strategy", self.default_strategy)
|
||||
languages = options.get("languages", self.default_languages)
|
||||
extract_image_block_types = options.get("extract_image_block_types")
|
||||
|
||||
# If no progress callback, just make the request directly
|
||||
if progress_callback is None:
|
||||
return await self._make_api_request(
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
filename=filename,
|
||||
strategy=strategy,
|
||||
languages=languages,
|
||||
extract_image_block_types=extract_image_block_types,
|
||||
)
|
||||
|
||||
# With progress callback: run API request + progress poller concurrently
|
||||
stop_event = anyio.Event()
|
||||
start_time = time.time()
|
||||
result = None
|
||||
|
||||
async def capture_result():
|
||||
nonlocal result
|
||||
try:
|
||||
result = await self._make_api_request(
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
filename=filename,
|
||||
strategy=strategy,
|
||||
languages=languages,
|
||||
extract_image_block_types=extract_image_block_types,
|
||||
)
|
||||
finally:
|
||||
# Signal poller to stop after API request completes
|
||||
stop_event.set()
|
||||
|
||||
# Run both tasks concurrently using anyio task groups
|
||||
async with anyio.create_task_group() as tg:
|
||||
tg.start_soon(capture_result)
|
||||
tg.start_soon(
|
||||
self._run_progress_poller, stop_event, progress_callback, start_time
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if Unstructured API is available.
|
||||
|
||||
Returns:
|
||||
True if API is healthy, False otherwise
|
||||
"""
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=5) as client:
|
||||
response = await client.get(f"{self.api_url}/healthcheck")
|
||||
return response.status_code == 200
|
||||
except Exception as e:
|
||||
logger.warning(f"Unstructured health check failed: {e}")
|
||||
return False
|
||||
@@ -5,6 +5,10 @@ from mcp.server.fastmcp import Context, FastMCP
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
from nextcloud_mcp_server.models import DirectoryListing, FileInfo, SearchFilesResponse
|
||||
from nextcloud_mcp_server.utils.document_parser import (
|
||||
is_parseable_document,
|
||||
parse_document,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -53,12 +57,53 @@ def configure_webdav_tools(mcp: FastMCP):
|
||||
path: Full path to the file to read
|
||||
|
||||
Returns:
|
||||
Dict with path, content, content_type, size, and encoding (if binary)
|
||||
Text files are decoded to UTF-8, binary files are base64 encoded
|
||||
Dict with path, content, content_type, size, and optional parsing metadata
|
||||
- Text files are decoded to UTF-8
|
||||
- Documents (PDF, DOCX, etc.) are parsed and text is extracted
|
||||
- Other binary files are base64 encoded
|
||||
|
||||
Examples:
|
||||
# Read a text file
|
||||
result = await nc_webdav_read_file("Documents/readme.txt")
|
||||
logger.info(result['content']) # Decoded text content
|
||||
|
||||
# Read a PDF document (automatically parsed)
|
||||
result = await nc_webdav_read_file("Documents/report.pdf")
|
||||
logger.info(result['content']) # Extracted text from PDF
|
||||
logger.info(result['parsing_metadata']) # Document parsing info
|
||||
|
||||
# Read a binary file
|
||||
result = await nc_webdav_read_file("Images/photo.jpg")
|
||||
logger.info(result['encoding']) # 'base64'
|
||||
"""
|
||||
client = get_client(ctx)
|
||||
content, content_type = await client.webdav.read_file(path)
|
||||
|
||||
# Check if this is a parseable document (PDF, DOCX, etc.)
|
||||
# is_parseable_document() checks if document processing is enabled
|
||||
if is_parseable_document(content_type):
|
||||
try:
|
||||
logger.info(f"Parsing document '{path}' of type '{content_type}'")
|
||||
parsed_text, metadata = await parse_document(
|
||||
content,
|
||||
content_type,
|
||||
filename=path,
|
||||
progress_callback=ctx.report_progress,
|
||||
)
|
||||
return {
|
||||
"path": path,
|
||||
"content": parsed_text,
|
||||
"content_type": content_type,
|
||||
"size": len(content),
|
||||
"parsed": True,
|
||||
"parsing_metadata": metadata,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to parse document '{path}', falling back to base64: {e}"
|
||||
)
|
||||
# Fall through to base64 encoding on parse failure
|
||||
|
||||
# For text files, decode content for easier viewing
|
||||
if content_type and content_type.startswith("text/"):
|
||||
try:
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
"""Utility functions for the Nextcloud MCP server."""
|
||||
@@ -0,0 +1,100 @@
|
||||
"""Document parsing utilities using pluggable processor registry."""
|
||||
|
||||
import base64
|
||||
import logging
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Optional, Tuple
|
||||
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
from nextcloud_mcp_server.document_processors import (
|
||||
ProcessorError,
|
||||
get_registry,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def is_parseable_document(content_type: Optional[str]) -> bool:
|
||||
"""Check if a document type can be parsed by any registered processor.
|
||||
|
||||
Args:
|
||||
content_type: The MIME type of the document
|
||||
|
||||
Returns:
|
||||
True if any processor can handle this type, False otherwise
|
||||
"""
|
||||
if not content_type:
|
||||
return False
|
||||
|
||||
config = get_document_processor_config()
|
||||
if not config["enabled"]:
|
||||
return False
|
||||
|
||||
registry = get_registry()
|
||||
processor = registry.find_processor(content_type)
|
||||
return processor is not None
|
||||
|
||||
|
||||
async def parse_document(
|
||||
content: bytes,
|
||||
content_type: Optional[str],
|
||||
filename: Optional[str] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> Tuple[str, dict]:
|
||||
"""Parse a document using registered processors.
|
||||
|
||||
This function uses the processor registry to find an appropriate
|
||||
processor for the given document type and extract text from it.
|
||||
|
||||
Args:
|
||||
content: The document content as bytes
|
||||
content_type: The MIME type of the document
|
||||
filename: Optional filename to help with format detection
|
||||
progress_callback: Optional async callback for progress updates during long operations
|
||||
|
||||
Returns:
|
||||
Tuple of (parsed_text, metadata) where:
|
||||
- parsed_text: The extracted text content
|
||||
- metadata: Additional metadata about the parsing
|
||||
|
||||
Raises:
|
||||
ValueError: If the document type is not supported
|
||||
Exception: If parsing fails
|
||||
"""
|
||||
if not content_type:
|
||||
raise ValueError("Content type is required for document parsing")
|
||||
|
||||
config = get_document_processor_config()
|
||||
if not config["enabled"]:
|
||||
raise ValueError("Document processing is disabled")
|
||||
|
||||
registry = get_registry()
|
||||
|
||||
logger.debug(f"Parsing document of type '{content_type}'")
|
||||
|
||||
try:
|
||||
# Process using registry (auto-selects processor based on MIME type)
|
||||
result = await registry.process(
|
||||
content=content,
|
||||
content_type=content_type,
|
||||
filename=filename,
|
||||
progress_callback=progress_callback,
|
||||
)
|
||||
|
||||
logger.info(f"Successfully parsed document with '{result.processor}' processor")
|
||||
|
||||
return result.text, result.metadata
|
||||
|
||||
except ProcessorError as e:
|
||||
logger.error(f"Document processing failed: {e}")
|
||||
# Fallback to base64 with error metadata
|
||||
parsed_text = f"Document could not be parsed. Base64 content: {base64.b64encode(content).decode('ascii')[:200]}..."
|
||||
metadata = {
|
||||
"mime_type": content_type,
|
||||
"text_length": len(parsed_text),
|
||||
"parsing_method": "fallback_base64",
|
||||
"error": str(e),
|
||||
}
|
||||
return parsed_text, metadata
|
||||
+10
-2
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.18.0"
|
||||
version = "0.22.7"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
@@ -10,7 +10,7 @@ license = {text = "AGPL-3.0-only"}
|
||||
requires-python = ">=3.11"
|
||||
keywords = ["nextcloud", "mcp", "model-context-protocol", "llm", "ai", "claude", "webdav", "caldav", "carddav"]
|
||||
dependencies = [
|
||||
"mcp[cli] (>=1.18,<1.19)",
|
||||
"mcp[cli] (>=1.19,<1.20)",
|
||||
"httpx (>=0.28.1,<0.29.0)",
|
||||
"pillow (>=12.0.0,<12.1.0)",
|
||||
"icalendar (>=6.0.0,<7.0.0)",
|
||||
@@ -65,6 +65,13 @@ version_scheme = "pep440"
|
||||
version_provider = "uv"
|
||||
update_changelog_on_bump = true
|
||||
major_version_zero = true
|
||||
version_files = [
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:appVersion",
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:version"
|
||||
]
|
||||
ignored_tag_formats = [
|
||||
"nextcloud-mcp-server-*"
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-select = ["I"]
|
||||
@@ -91,6 +98,7 @@ dev = [
|
||||
"pytest-playwright-asyncio>=0.7.1",
|
||||
"pytest-timeout>=2.3.1",
|
||||
"ruff>=0.11.13",
|
||||
"reportlab>=4.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -0,0 +1,143 @@
|
||||
"""Integration tests for document processing with progress notifications."""
|
||||
|
||||
import io
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
class TestDocumentProcessingProgress:
|
||||
"""Test document processing with progress notifications."""
|
||||
|
||||
async def test_unstructured_processor_with_progress_callback(self, nc_client):
|
||||
"""Test that UnstructuredProcessor calls progress callback during processing."""
|
||||
import os
|
||||
|
||||
# Skip if unstructured is not enabled
|
||||
if os.getenv("ENABLE_UNSTRUCTURED", "false").lower() != "true":
|
||||
pytest.skip("Unstructured processor not enabled")
|
||||
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
# Track progress callback invocations
|
||||
progress_updates = []
|
||||
|
||||
async def track_progress(progress: float, total: float | None, message: str):
|
||||
progress_updates.append(
|
||||
{"progress": progress, "total": total, "message": message}
|
||||
)
|
||||
|
||||
# Create processor configured to use local unstructured service
|
||||
processor = UnstructuredProcessor(
|
||||
api_url=os.getenv("UNSTRUCTURED_API_URL", "http://unstructured:8000"),
|
||||
timeout=120,
|
||||
progress_interval=2, # 2 second intervals for testing
|
||||
)
|
||||
|
||||
# Create a simple test image (which requires OCR processing)
|
||||
# This should take long enough to trigger at least one progress update
|
||||
img = Image.new("RGB", (400, 200), color=(73, 109, 137))
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="PNG")
|
||||
test_image = buffer.getvalue()
|
||||
|
||||
# Process with progress callback
|
||||
result = await processor.process(
|
||||
content=test_image,
|
||||
content_type="image/png",
|
||||
filename="test.png",
|
||||
progress_callback=track_progress,
|
||||
)
|
||||
|
||||
# Verify processing succeeded
|
||||
assert result.success is True
|
||||
assert result.processor == "unstructured"
|
||||
assert isinstance(result.text, str)
|
||||
|
||||
# Note: Progress updates may or may not occur depending on processing speed
|
||||
# If updates occurred, verify their structure
|
||||
if progress_updates:
|
||||
for update in progress_updates:
|
||||
assert isinstance(update["progress"], float)
|
||||
assert update["total"] is None # Unknown total
|
||||
assert "Processing document with unstructured" in update["message"]
|
||||
assert "elapsed" in update["message"]
|
||||
|
||||
async def test_webdav_read_file_sends_progress_notifications(
|
||||
self, nc_mcp_client, nc_client
|
||||
):
|
||||
"""Test that reading a document via WebDAV MCP tool sends progress notifications."""
|
||||
import os
|
||||
|
||||
# Skip if document processing is not enabled
|
||||
if os.getenv("ENABLE_DOCUMENT_PROCESSING", "false").lower() != "true":
|
||||
pytest.skip("Document processing not enabled")
|
||||
|
||||
# Create a test image file in Nextcloud via WebDAV
|
||||
from PIL import Image
|
||||
|
||||
img = Image.new("RGB", (400, 200), color=(100, 150, 200))
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="PNG")
|
||||
test_image = buffer.getvalue()
|
||||
|
||||
# Upload test file
|
||||
test_path = "test_progress.png"
|
||||
await nc_client.webdav.write_file(test_path, test_image, "image/png")
|
||||
|
||||
try:
|
||||
# Read file via MCP tool (which should trigger document processing)
|
||||
# The MCP client will automatically track progress notifications
|
||||
result = await nc_mcp_client.call_tool(
|
||||
"nc_webdav_read_file", arguments={"path": test_path}
|
||||
)
|
||||
|
||||
# Note: FastMCP progress notifications are sent automatically by ctx.report_progress
|
||||
# We can't easily capture them in this test without mocking the MCP transport layer
|
||||
# The important thing is that the code path is exercised without errors
|
||||
assert result.isError is False
|
||||
|
||||
finally:
|
||||
# Cleanup
|
||||
try:
|
||||
await nc_client.webdav.delete_resource(test_path)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
async def test_progress_callback_not_required(self, nc_client):
|
||||
"""Test that processing works without progress callback (backward compatibility)."""
|
||||
import os
|
||||
|
||||
if os.getenv("ENABLE_UNSTRUCTURED", "false").lower() != "true":
|
||||
pytest.skip("Unstructured processor not enabled")
|
||||
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
processor = UnstructuredProcessor(
|
||||
api_url=os.getenv("UNSTRUCTURED_API_URL", "http://unstructured:8000"),
|
||||
timeout=120,
|
||||
)
|
||||
|
||||
# Create simple test image
|
||||
img = Image.new("RGB", (200, 100), color=(50, 100, 150))
|
||||
buffer = io.BytesIO()
|
||||
img.save(buffer, format="PNG")
|
||||
test_image = buffer.getvalue()
|
||||
|
||||
# Process WITHOUT progress callback
|
||||
result = await processor.process(
|
||||
content=test_image,
|
||||
content_type="image/png",
|
||||
filename="test.png",
|
||||
progress_callback=None, # Explicitly None
|
||||
)
|
||||
|
||||
# Should still work
|
||||
assert result.success is True
|
||||
assert result.processor == "unstructured"
|
||||
@@ -0,0 +1,155 @@
|
||||
"""Integration tests for Unstructured API functionality."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from io import BytesIO
|
||||
|
||||
import pytest
|
||||
from mcp.client.session import ClientSession
|
||||
from reportlab.lib.pagesizes import letter
|
||||
from reportlab.pdfgen import canvas
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
async def test_base_path(nc_client: NextcloudClient):
|
||||
"""Base path for test files/directories."""
|
||||
test_dir = f"mcp_test_unstructured_{uuid.uuid4().hex[:8]}"
|
||||
await nc_client.webdav.create_directory(test_dir)
|
||||
yield test_dir
|
||||
try:
|
||||
await nc_client.webdav.delete_resource(test_dir)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
def create_test_pdf(text: str) -> bytes:
|
||||
"""Create a simple PDF document with the given text."""
|
||||
buffer = BytesIO()
|
||||
c = canvas.Canvas(buffer, pagesize=letter)
|
||||
c.drawString(100, 750, text)
|
||||
c.save()
|
||||
buffer.seek(0)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
condition=os.getenv("ENABLE_UNSTRUCTURED", "false") != "true",
|
||||
reason="Unstructured is not enabled",
|
||||
)
|
||||
async def test_unstructured_api_enabled_parsing(
|
||||
nc_client: NextcloudClient, test_base_path: str, nc_mcp_client: ClientSession
|
||||
):
|
||||
"""Test that documents are parsed using the Unstructured API when enabled."""
|
||||
test_file = f"{test_base_path}/test_unstructured_pdf.pdf"
|
||||
test_text = "This is a test PDF document for Unstructured API parsing"
|
||||
|
||||
try:
|
||||
# Create a simple PDF
|
||||
pdf_content = create_test_pdf(test_text)
|
||||
|
||||
# Upload the PDF
|
||||
await nc_client.webdav.write_file(
|
||||
test_file, pdf_content, content_type="application/pdf"
|
||||
)
|
||||
logger.info(f"Uploaded PDF file: {test_file}")
|
||||
|
||||
# Read the PDF using MCP tool (should parse via Unstructured API)
|
||||
mcp_result = await nc_mcp_client.call_tool(
|
||||
"nc_webdav_read_file", arguments={"path": test_file}
|
||||
)
|
||||
|
||||
# Extract content from the MCP result
|
||||
if hasattr(mcp_result.content[0], "text"):
|
||||
result_text = mcp_result.content[0].text
|
||||
else:
|
||||
# Fallback for other content types
|
||||
result_text = str(mcp_result.content[0])
|
||||
|
||||
# Parse the JSON response
|
||||
result = json.loads(result_text)
|
||||
|
||||
# Verify the result structure
|
||||
assert "path" in result
|
||||
assert "content" in result
|
||||
assert "content_type" in result
|
||||
assert "parsed" in result # Should be present when parsing succeeds
|
||||
|
||||
# The content should be readable text, not base64
|
||||
content = result["content"]
|
||||
assert isinstance(content, str)
|
||||
assert len(content) > 0
|
||||
assert "test" in content.lower() # Should contain our test text
|
||||
|
||||
# Should have parsing metadata
|
||||
assert "parsing_metadata" in result
|
||||
parsing_metadata = result["parsing_metadata"]
|
||||
assert parsing_metadata["parsing_method"] == "unstructured_api"
|
||||
|
||||
logger.info("Successfully parsed PDF using Unstructured API")
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
try:
|
||||
await nc_client.webdav.delete_resource(test_file)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
condition=os.getenv("ENABLE_UNSTRUCTURED", "false") != "true",
|
||||
reason="Unstructured is not enabled",
|
||||
)
|
||||
async def test_unstructured_api_with_docx(
|
||||
nc_client: NextcloudClient, test_base_path: str, nc_mcp_client: ClientSession
|
||||
):
|
||||
"""Test Unstructured API with DOCX files."""
|
||||
test_file = f"{test_base_path}/test_unstructured_docx.docx"
|
||||
try:
|
||||
# Create a simple DOCX-like file for testing purposes
|
||||
# Since we're removing python-docx dependency, we'll create a simple file
|
||||
docx_content = (
|
||||
b"This is a mock DOCX file content for testing Unstructured API parsing"
|
||||
)
|
||||
|
||||
# Upload the file
|
||||
await nc_client.webdav.write_file(
|
||||
test_file,
|
||||
docx_content,
|
||||
content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
logger.info(f"Uploaded DOCX file: {test_file}")
|
||||
|
||||
# Read the file using MCP tool
|
||||
mcp_result = await nc_mcp_client.call_tool(
|
||||
"nc_webdav_read_file", arguments={"path": test_file}
|
||||
)
|
||||
|
||||
# Extract content from the MCP result
|
||||
if hasattr(mcp_result.content[0], "text"):
|
||||
result_text = mcp_result.content[0].text
|
||||
else:
|
||||
# Fallback for other content types
|
||||
result_text = str(mcp_result.content[0])
|
||||
|
||||
# Parse the JSON response
|
||||
result = json.loads(result_text)
|
||||
|
||||
# Verify the result structure
|
||||
assert "path" in result
|
||||
assert "content" in result
|
||||
assert "content_type" in result
|
||||
|
||||
logger.info("Successfully processed DOCX file with Unstructured API")
|
||||
|
||||
finally:
|
||||
# Clean up
|
||||
try:
|
||||
await nc_client.webdav.delete_resource(test_file)
|
||||
except Exception:
|
||||
pass # Ignore cleanup errors
|
||||
@@ -3,8 +3,8 @@ Tests for Dynamic Client Registration (DCR) token_type parameter.
|
||||
|
||||
These tests verify that the Nextcloud OIDC server properly honors the token_type
|
||||
parameter during client registration, issuing the correct type of access tokens:
|
||||
- token_type="JWT" → JWT-formatted tokens (RFC 9068)
|
||||
- token_type="Bearer" → Opaque tokens (standard OAuth2)
|
||||
- token_type="jwt" → JWT-formatted tokens (RFC 9068)
|
||||
- token_type="opaque" → Opaque tokens (standard OAuth2)
|
||||
|
||||
This is critical for ensuring:
|
||||
1. Client choice is respected by the OIDC server
|
||||
@@ -208,12 +208,14 @@ async def test_dcr_respects_jwt_token_type(
|
||||
oauth_callback_server,
|
||||
):
|
||||
"""
|
||||
Test that DCR honors token_type=JWT and issues JWT-formatted tokens.
|
||||
Test that DCR honors token_type=jwt and issues JWT-formatted tokens.
|
||||
|
||||
This verifies:
|
||||
1. Client registration with token_type="JWT" succeeds
|
||||
1. Client registration with token_type="jwt" succeeds
|
||||
2. Tokens obtained via this client are JWT format (base64.base64.signature)
|
||||
3. JWT payload contains expected claims (sub, iss, scope, etc.)
|
||||
|
||||
Note: The OIDC app uses lowercase 'jwt' (not 'JWT').
|
||||
"""
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST")
|
||||
if not nextcloud_host:
|
||||
@@ -232,15 +234,15 @@ async def test_dcr_respects_jwt_token_type(
|
||||
token_endpoint = oidc_config.get("token_endpoint")
|
||||
authorization_endpoint = oidc_config.get("authorization_endpoint")
|
||||
|
||||
# Register client with token_type="JWT"
|
||||
logger.info("Registering OAuth client with token_type=JWT...")
|
||||
# Register client with token_type="jwt"
|
||||
logger.info("Registering OAuth client with token_type=jwt...")
|
||||
client_info = await register_client(
|
||||
nextcloud_url=nextcloud_host,
|
||||
registration_endpoint=registration_endpoint,
|
||||
client_name="DCR Test - JWT Token Type",
|
||||
redirect_uris=[callback_url],
|
||||
scopes="openid profile email notes:read notes:write",
|
||||
token_type="JWT",
|
||||
token_type="jwt",
|
||||
)
|
||||
|
||||
logger.info(f"Registered JWT client: {client_info.client_id[:16]}...")
|
||||
@@ -278,7 +280,7 @@ async def test_dcr_respects_jwt_token_type(
|
||||
assert "notes:write" in scopes, "JWT scope claim missing notes:write"
|
||||
|
||||
logger.info(
|
||||
f"✅ DCR with token_type=JWT works correctly! "
|
||||
f"✅ DCR with token_type=jwt works correctly! "
|
||||
f"Token is JWT format with scope claim: {payload['scope']}"
|
||||
)
|
||||
|
||||
@@ -290,12 +292,14 @@ async def test_dcr_respects_bearer_token_type(
|
||||
oauth_callback_server,
|
||||
):
|
||||
"""
|
||||
Test that DCR honors token_type=Bearer and issues opaque tokens.
|
||||
Test that DCR honors token_type=opaque and issues opaque tokens.
|
||||
|
||||
This verifies:
|
||||
1. Client registration with token_type="Bearer" succeeds
|
||||
1. Client registration with token_type="opaque" succeeds
|
||||
2. Tokens obtained via this client are opaque (NOT JWT format)
|
||||
3. Opaque tokens are simple strings, not base64-encoded structures
|
||||
|
||||
Note: The OIDC app uses 'opaque' or 'jwt' as token_type values (not 'Bearer').
|
||||
"""
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST")
|
||||
if not nextcloud_host:
|
||||
@@ -314,18 +318,18 @@ async def test_dcr_respects_bearer_token_type(
|
||||
token_endpoint = oidc_config.get("token_endpoint")
|
||||
authorization_endpoint = oidc_config.get("authorization_endpoint")
|
||||
|
||||
# Register client with token_type="Bearer" (opaque tokens)
|
||||
logger.info("Registering OAuth client with token_type=Bearer...")
|
||||
# Register client with token_type="opaque" (opaque tokens)
|
||||
logger.info("Registering OAuth client with token_type=opaque...")
|
||||
client_info = await register_client(
|
||||
nextcloud_url=nextcloud_host,
|
||||
registration_endpoint=registration_endpoint,
|
||||
client_name="DCR Test - Bearer Token Type",
|
||||
client_name="DCR Test - Opaque Token Type",
|
||||
redirect_uris=[callback_url],
|
||||
scopes="openid profile email notes:read notes:write",
|
||||
token_type="Bearer",
|
||||
token_type="opaque",
|
||||
)
|
||||
|
||||
logger.info(f"Registered Bearer client: {client_info.client_id[:16]}...")
|
||||
logger.info(f"Registered Opaque token client: {client_info.client_id[:16]}...")
|
||||
|
||||
# Obtain token via OAuth flow
|
||||
access_token = await get_oauth_token_with_client(
|
||||
@@ -353,7 +357,7 @@ async def test_dcr_respects_bearer_token_type(
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
f"✅ DCR with token_type=Bearer works correctly! "
|
||||
f"✅ DCR with token_type=opaque works correctly! "
|
||||
f"Token is opaque (not JWT format): {access_token[:30]}..."
|
||||
)
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
import pytest
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
pytestmark = pytest.mark.integration
|
||||
|
||||
|
||||
async def test_create_and_delete_user(nc_client: NextcloudClient, test_user):
|
||||
"""Test creating a user and verifying deletion (cleanup by fixture)."""
|
||||
|
||||
+11
-3
@@ -253,9 +253,17 @@ def test_default_values(runner, clean_env, monkeypatch):
|
||||
_ = runner.invoke(run, [])
|
||||
|
||||
# Verify default values
|
||||
assert (
|
||||
captured_env["NEXTCLOUD_OIDC_SCOPES"]
|
||||
== "openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write"
|
||||
assert captured_env["NEXTCLOUD_OIDC_SCOPES"] == (
|
||||
"openid profile email "
|
||||
"notes:read notes:write "
|
||||
"calendar:read calendar:write "
|
||||
"todo:read todo:write "
|
||||
"contacts:read contacts:write "
|
||||
"cookbook:read cookbook:write "
|
||||
"deck:read deck:write "
|
||||
"tables:read tables:write "
|
||||
"files:read files:write "
|
||||
"sharing:read sharing:write"
|
||||
)
|
||||
assert captured_env["NEXTCLOUD_OIDC_TOKEN_TYPE"] == "bearer"
|
||||
assert captured_env["NEXTCLOUD_MCP_SERVER_URL"] == "http://localhost:8000"
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
"""Unit tests for document processor configuration."""
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class TestDocumentProcessorConfig:
|
||||
"""Test document processor configuration system."""
|
||||
|
||||
def test_config_disabled_by_default(self):
|
||||
"""Test that document processing is disabled by default."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ.pop("ENABLE_DOCUMENT_PROCESSING", None)
|
||||
config = get_document_processor_config()
|
||||
assert config["enabled"] is False
|
||||
|
||||
def test_config_enabled(self):
|
||||
"""Test enabling document processing."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ["ENABLE_DOCUMENT_PROCESSING"] = "true"
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert config["enabled"] is True
|
||||
finally:
|
||||
os.environ.pop("ENABLE_DOCUMENT_PROCESSING", None)
|
||||
|
||||
def test_unstructured_processor_config(self):
|
||||
"""Test Unstructured processor configuration."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ["ENABLE_UNSTRUCTURED"] = "true"
|
||||
os.environ["UNSTRUCTURED_API_URL"] = "http://test:8000"
|
||||
os.environ["UNSTRUCTURED_STRATEGY"] = "hi_res"
|
||||
os.environ["UNSTRUCTURED_LANGUAGES"] = "eng,fra"
|
||||
os.environ["UNSTRUCTURED_TIMEOUT"] = "60"
|
||||
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert "unstructured" in config["processors"]
|
||||
unst_config = config["processors"]["unstructured"]
|
||||
assert unst_config["api_url"] == "http://test:8000"
|
||||
assert unst_config["strategy"] == "hi_res"
|
||||
assert unst_config["languages"] == ["eng", "fra"]
|
||||
assert unst_config["timeout"] == 60
|
||||
finally:
|
||||
os.environ.pop("ENABLE_UNSTRUCTURED", None)
|
||||
os.environ.pop("UNSTRUCTURED_API_URL", None)
|
||||
os.environ.pop("UNSTRUCTURED_STRATEGY", None)
|
||||
os.environ.pop("UNSTRUCTURED_LANGUAGES", None)
|
||||
os.environ.pop("UNSTRUCTURED_TIMEOUT", None)
|
||||
|
||||
def test_tesseract_processor_config(self):
|
||||
"""Test Tesseract processor configuration."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ["ENABLE_TESSERACT"] = "true"
|
||||
os.environ["TESSERACT_LANG"] = "eng+deu"
|
||||
os.environ["TESSERACT_CMD"] = "/usr/local/bin/tesseract"
|
||||
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert "tesseract" in config["processors"]
|
||||
tess_config = config["processors"]["tesseract"]
|
||||
assert tess_config["lang"] == "eng+deu"
|
||||
assert tess_config["tesseract_cmd"] == "/usr/local/bin/tesseract"
|
||||
finally:
|
||||
os.environ.pop("ENABLE_TESSERACT", None)
|
||||
os.environ.pop("TESSERACT_LANG", None)
|
||||
os.environ.pop("TESSERACT_CMD", None)
|
||||
|
||||
def test_custom_processor_config(self):
|
||||
"""Test custom processor configuration."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ["ENABLE_CUSTOM_PROCESSOR"] = "true"
|
||||
os.environ["CUSTOM_PROCESSOR_NAME"] = "my_ocr"
|
||||
os.environ["CUSTOM_PROCESSOR_URL"] = "http://localhost:9000/process"
|
||||
os.environ["CUSTOM_PROCESSOR_API_KEY"] = "secret"
|
||||
os.environ["CUSTOM_PROCESSOR_TIMEOUT"] = "30"
|
||||
os.environ["CUSTOM_PROCESSOR_TYPES"] = "application/pdf,image/jpeg"
|
||||
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert "custom" in config["processors"]
|
||||
custom_config = config["processors"]["custom"]
|
||||
assert custom_config["name"] == "my_ocr"
|
||||
assert custom_config["api_url"] == "http://localhost:9000/process"
|
||||
assert custom_config["api_key"] == "secret"
|
||||
assert custom_config["timeout"] == 30
|
||||
assert "application/pdf" in custom_config["supported_types"]
|
||||
assert "image/jpeg" in custom_config["supported_types"]
|
||||
finally:
|
||||
os.environ.pop("ENABLE_CUSTOM_PROCESSOR", None)
|
||||
os.environ.pop("CUSTOM_PROCESSOR_NAME", None)
|
||||
os.environ.pop("CUSTOM_PROCESSOR_URL", None)
|
||||
os.environ.pop("CUSTOM_PROCESSOR_API_KEY", None)
|
||||
os.environ.pop("CUSTOM_PROCESSOR_TIMEOUT", None)
|
||||
os.environ.pop("CUSTOM_PROCESSOR_TYPES", None)
|
||||
|
||||
def test_multiple_processors(self):
|
||||
"""Test configuration with multiple processors enabled."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ["ENABLE_DOCUMENT_PROCESSING"] = "true"
|
||||
os.environ["ENABLE_UNSTRUCTURED"] = "true"
|
||||
os.environ["ENABLE_TESSERACT"] = "true"
|
||||
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert config["enabled"] is True
|
||||
assert "unstructured" in config["processors"]
|
||||
assert "tesseract" in config["processors"]
|
||||
finally:
|
||||
os.environ.pop("ENABLE_DOCUMENT_PROCESSING", None)
|
||||
os.environ.pop("ENABLE_UNSTRUCTURED", None)
|
||||
os.environ.pop("ENABLE_TESSERACT", None)
|
||||
|
||||
def test_default_processor_selection(self):
|
||||
"""Test default processor configuration."""
|
||||
from nextcloud_mcp_server.config import get_document_processor_config
|
||||
|
||||
os.environ.pop("DOCUMENT_PROCESSOR", None)
|
||||
config = get_document_processor_config()
|
||||
assert config["default_processor"] == "unstructured"
|
||||
|
||||
os.environ["DOCUMENT_PROCESSOR"] = "tesseract"
|
||||
try:
|
||||
config = get_document_processor_config()
|
||||
assert config["default_processor"] == "tesseract"
|
||||
finally:
|
||||
os.environ.pop("DOCUMENT_PROCESSOR", None)
|
||||
@@ -0,0 +1,164 @@
|
||||
"""Unit tests for progress notification system."""
|
||||
|
||||
import time
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import anyio
|
||||
import pytest
|
||||
|
||||
pytestmark = pytest.mark.unit
|
||||
|
||||
|
||||
class TestProgressNotification:
|
||||
"""Test progress notification in document processors."""
|
||||
|
||||
async def test_progress_callback_called_during_processing(self):
|
||||
"""Test that progress callback is called at intervals during processing."""
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
# Mock progress callback to track calls
|
||||
progress_callback = AsyncMock()
|
||||
|
||||
# Create processor with 1-second interval for faster testing
|
||||
processor = UnstructuredProcessor(
|
||||
api_url="http://test:8000",
|
||||
timeout=10,
|
||||
progress_interval=1,
|
||||
)
|
||||
|
||||
# Create a mock event and start time
|
||||
stop_event = anyio.Event()
|
||||
start_time = time.time()
|
||||
|
||||
# Run the poller for 3 seconds, then stop it
|
||||
async def stop_after_delay():
|
||||
await anyio.sleep(3.5)
|
||||
stop_event.set()
|
||||
|
||||
# Run poller and stopper concurrently
|
||||
async with anyio.create_task_group() as tg:
|
||||
tg.start_soon(
|
||||
processor._run_progress_poller,
|
||||
stop_event,
|
||||
progress_callback,
|
||||
start_time,
|
||||
)
|
||||
tg.start_soon(stop_after_delay)
|
||||
|
||||
# Verify progress callback was called at least 3 times (1s, 2s, 3s)
|
||||
assert progress_callback.call_count >= 3
|
||||
|
||||
# Verify each call had correct structure
|
||||
for call in progress_callback.call_args_list:
|
||||
# Calls are made with keyword arguments
|
||||
assert "progress" in call.kwargs
|
||||
assert "total" in call.kwargs
|
||||
assert "message" in call.kwargs
|
||||
|
||||
progress = call.kwargs["progress"]
|
||||
total = call.kwargs["total"]
|
||||
message = call.kwargs["message"]
|
||||
|
||||
assert isinstance(progress, float)
|
||||
assert total is None # Unknown total for unstructured
|
||||
assert "Processing document with unstructured" in message
|
||||
assert "elapsed" in message
|
||||
|
||||
async def test_progress_poller_stops_when_event_set(self):
|
||||
"""Test that progress poller stops immediately when event is set."""
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
progress_callback = AsyncMock()
|
||||
processor = UnstructuredProcessor(
|
||||
api_url="http://test:8000",
|
||||
timeout=10,
|
||||
progress_interval=10, # Long interval
|
||||
)
|
||||
|
||||
stop_event = anyio.Event()
|
||||
start_time = time.time()
|
||||
|
||||
# Set event immediately
|
||||
stop_event.set()
|
||||
|
||||
# Run poller
|
||||
await processor._run_progress_poller(stop_event, progress_callback, start_time)
|
||||
|
||||
# Should not call progress callback since event was already set
|
||||
assert progress_callback.call_count == 0
|
||||
|
||||
async def test_progress_callback_exception_handled(self):
|
||||
"""Test that exceptions in progress callback don't crash the poller."""
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
# Mock callback that raises exception
|
||||
progress_callback = AsyncMock(side_effect=Exception("Callback error"))
|
||||
|
||||
processor = UnstructuredProcessor(
|
||||
api_url="http://test:8000",
|
||||
timeout=10,
|
||||
progress_interval=1,
|
||||
)
|
||||
|
||||
stop_event = anyio.Event()
|
||||
start_time = time.time()
|
||||
|
||||
# Run poller for 2 seconds
|
||||
async def stop_after_delay():
|
||||
await anyio.sleep(2.5)
|
||||
stop_event.set()
|
||||
|
||||
# Should not raise exception even though callback fails
|
||||
async with anyio.create_task_group() as tg:
|
||||
tg.start_soon(
|
||||
processor._run_progress_poller,
|
||||
stop_event,
|
||||
progress_callback,
|
||||
start_time,
|
||||
)
|
||||
tg.start_soon(stop_after_delay)
|
||||
|
||||
# Callback should have been called (and failed) at least twice
|
||||
assert progress_callback.call_count >= 2
|
||||
|
||||
async def test_process_without_progress_callback(self):
|
||||
"""Test that processing works without progress callback (backward compatibility)."""
|
||||
from nextcloud_mcp_server.document_processors.unstructured import (
|
||||
UnstructuredProcessor,
|
||||
)
|
||||
|
||||
processor = UnstructuredProcessor(
|
||||
api_url="http://test:8000",
|
||||
timeout=10,
|
||||
progress_interval=1,
|
||||
)
|
||||
|
||||
# Mock the _make_api_request method to avoid actual HTTP call
|
||||
from unittest.mock import patch
|
||||
|
||||
from nextcloud_mcp_server.document_processors.base import ProcessingResult
|
||||
|
||||
mock_result = ProcessingResult(
|
||||
text="Test content",
|
||||
metadata={"test": "data"},
|
||||
processor="unstructured",
|
||||
success=True,
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
processor, "_make_api_request", return_value=mock_result
|
||||
) as mock_request:
|
||||
# Call process without progress_callback
|
||||
result = await processor.process(
|
||||
content=b"test", content_type="application/pdf", progress_callback=None
|
||||
)
|
||||
|
||||
# Should call _make_api_request directly
|
||||
assert result == mock_result
|
||||
mock_request.assert_called_once()
|
||||
Vendored
+1
-1
Submodule third_party/oidc updated: e4659c79ef...84f31d302f
Reference in New Issue
Block a user