Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a3b80cb98 | |||
| fc3ab8d0ac | |||
| 0f03541486 | |||
| ef07b1a6c9 | |||
| 4f82357f24 | |||
| c4293b6750 | |||
| 72e4eb3d19 | |||
| 47dd2df7aa | |||
| 9fd2022151 | |||
| b99dc52c95 | |||
| 78b27fb5e9 | |||
| 03e39a3f94 | |||
| 5259658458 | |||
| e03a3c2e83 | |||
| 94cbd3015d | |||
| 49a961cbcc | |||
| e1aca04aff | |||
| 3b12e585ca | |||
| e647c87dd8 | |||
| cb74157d51 | |||
| 202058bdc8 | |||
| c312911538 | |||
| e602684743 | |||
| 8221046d8a | |||
| 3e45b6ca25 | |||
| 9ec7637579 | |||
| 670188f9e4 | |||
| 3878beaf65 | |||
| a5a0571bde | |||
| 0e7e74867f | |||
| a29045cca4 | |||
| 3c3646bec2 | |||
| dd636e6a08 | |||
| e0de2e17e9 | |||
| 4fc0cb5a41 | |||
| ff9cca716b | |||
| ef4a82e589 | |||
| 301c502e57 | |||
| d4d291d6d2 | |||
| e4b0ea5093 |
@@ -14,16 +14,109 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "$GITHUB_ACTOR"
|
||||
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
|
||||
|
||||
- name: Run chart-releaser
|
||||
uses: helm/chart-releaser-action@v1.7.0
|
||||
uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
|
||||
env:
|
||||
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
- name: Update gh-pages with Chart README and Index
|
||||
run: |
|
||||
# Get the repository name
|
||||
REPO_NAME="${GITHUB_REPOSITORY##*/}"
|
||||
REPO_OWNER="${GITHUB_REPOSITORY%/*}"
|
||||
|
||||
# Switch to gh-pages branch
|
||||
git fetch origin gh-pages
|
||||
git checkout gh-pages
|
||||
|
||||
# Copy Chart README to root
|
||||
git checkout ${GITHUB_REF#refs/tags/} -- charts/nextcloud-mcp-server/README.md
|
||||
mv charts/nextcloud-mcp-server/README.md README.md || true
|
||||
rm -rf charts 2>/dev/null || true
|
||||
|
||||
# Create index.html with installation instructions
|
||||
cat > index.html <<'EOF'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Nextcloud MCP Server Helm Chart</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
||||
max-width: 800px;
|
||||
margin: 50px auto;
|
||||
padding: 20px;
|
||||
line-height: 1.6;
|
||||
}
|
||||
code {
|
||||
background: #f4f4f4;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
font-family: "Monaco", "Courier New", monospace;
|
||||
}
|
||||
pre {
|
||||
background: #f4f4f4;
|
||||
padding: 15px;
|
||||
border-radius: 5px;
|
||||
overflow-x: auto;
|
||||
}
|
||||
h1, h2 { color: #0082c9; }
|
||||
a { color: #0082c9; text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Nextcloud MCP Server Helm Chart</h1>
|
||||
|
||||
<p>A Helm chart for deploying the Nextcloud MCP (Model Context Protocol) Server on Kubernetes, enabling AI assistants to interact with your Nextcloud instance.</p>
|
||||
|
||||
<h2>Installation</h2>
|
||||
|
||||
<p>Add the Helm repository:</p>
|
||||
<pre><code>helm repo add nextcloud-mcp https://REPO_OWNER.github.io/REPO_NAME/
|
||||
helm repo update</code></pre>
|
||||
|
||||
<p>Install the chart:</p>
|
||||
<pre><code>helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
|
||||
--set nextcloud.host=https://cloud.example.com \
|
||||
--set auth.basic.username=myuser \
|
||||
--set auth.basic.password=mypassword</code></pre>
|
||||
|
||||
<h2>Documentation</h2>
|
||||
|
||||
<ul>
|
||||
<li><a href="README.md">Chart README</a> - Full documentation for the Helm chart</li>
|
||||
<li><a href="https://github.com/REPO_OWNER/REPO_NAME">GitHub Repository</a> - Source code and issues</li>
|
||||
<li><a href="index.yaml">Helm Repository Index</a> - Chart metadata</li>
|
||||
</ul>
|
||||
|
||||
<h2>Quick Start</h2>
|
||||
|
||||
<p>See the <a href="README.md">full documentation</a> for detailed configuration options, examples, and troubleshooting guides.</p>
|
||||
|
||||
<hr>
|
||||
<p><small>Generated by <a href="https://github.com/helm/chart-releaser">chart-releaser</a></small></p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
# Replace placeholders
|
||||
sed -i "s/REPO_OWNER/$REPO_OWNER/g" index.html
|
||||
sed -i "s/REPO_NAME/$REPO_NAME/g" index.html
|
||||
|
||||
# Commit changes
|
||||
git add README.md index.html
|
||||
git commit -m "Update README and index from chart release" || echo "No changes to commit"
|
||||
git push origin gh-pages
|
||||
|
||||
@@ -20,7 +20,7 @@ jobs:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
- name: Install Python 3.11
|
||||
run: uv python install 3.11
|
||||
- name: Build
|
||||
|
||||
@@ -11,7 +11,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
- name: Check format
|
||||
run: |
|
||||
uv run --frozen ruff format --diff
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
up-flags: "--build"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
|
||||
- name: Install Playwright dependencies
|
||||
run: |
|
||||
|
||||
@@ -1,3 +1,53 @@
|
||||
## v0.22.7 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Remove image tag overide
|
||||
|
||||
## v0.22.6 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm chart with extraArgs
|
||||
|
||||
## v0.22.5 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update helm chart variables
|
||||
|
||||
## v0.22.4 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.3 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.2 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## v0.22.1 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Trigger release
|
||||
|
||||
## v0.22.0 (2025-10-29)
|
||||
|
||||
### Feat
|
||||
|
||||
- **server**: Add /live & /health endpoints
|
||||
- Initialize helm chart
|
||||
|
||||
## v0.21.0 (2025-10-25)
|
||||
|
||||
### Feat
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
FROM ghcr.io/astral-sh/uv:0.9.5-python3.11-alpine@sha256:64ecec379ff82bea84b8a80c0b374f6392bcd54aa52f8c63c12f510f9c0b214d
|
||||
FROM ghcr.io/astral-sh/uv:0.9.6-python3.11-alpine@sha256:b2a366adae7002a23dbba79791baac4e607ee5af5d45039d072d30115c505666
|
||||
|
||||
# Install git (required for caldav dependency from git)
|
||||
RUN apk add --no-cache git
|
||||
|
||||
@@ -72,9 +72,17 @@ uv sync
|
||||
|
||||
# Or using Docker
|
||||
docker pull ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
|
||||
# Or deploy to Kubernetes with Helm
|
||||
helm repo add nextcloud-mcp https://cbcoutinho.github.io/nextcloud-mcp-server
|
||||
helm repo update
|
||||
helm install nextcloud-mcp nextcloud-mcp/nextcloud-mcp-server \
|
||||
--set nextcloud.host=https://cloud.example.com \
|
||||
--set auth.basic.username=myuser \
|
||||
--set auth.basic.password=mypassword
|
||||
```
|
||||
|
||||
See [Installation Guide](docs/installation.md) for detailed instructions.
|
||||
See [Installation Guide](docs/installation.md) for detailed instructions, or [Helm Chart README](charts/nextcloud-mcp-server/README.md) for Kubernetes deployment.
|
||||
|
||||
### 2. Configure
|
||||
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "0.21.0"
|
||||
version: 0.22.7
|
||||
appVersion: "0.22.7"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
|
||||
@@ -114,14 +114,35 @@ ingress:
|
||||
| `auth.oauth.persistence.enabled` | Enable persistent storage for OAuth | `true` |
|
||||
| `auth.oauth.persistence.size` | Size of OAuth storage PVC | `100Mi` |
|
||||
|
||||
#### MCP Server Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `mcp.transport` | Transport mode | `streamable-http` |
|
||||
| `mcp.port` | Server port (used by both auth modes) | `8000` |
|
||||
| `mcp.extraArgs` | Additional command-line arguments | `[]` |
|
||||
|
||||
The `extraArgs` parameter allows you to pass additional command-line arguments to the MCP server. This is useful for enabling debug logging, enabling specific apps, or other runtime configuration.
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
mcp:
|
||||
extraArgs:
|
||||
- "--log-level"
|
||||
- "debug"
|
||||
- "--enable-app"
|
||||
- "notes"
|
||||
```
|
||||
|
||||
#### Image Configuration
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `image.repository` | Container image repository | `ghcr.io/cbcoutinho/nextcloud-mcp-server` |
|
||||
| `image.tag` | Container image tag | `""` (uses chart appVersion) |
|
||||
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
|
||||
|
||||
**Note:** Image tag is automatically set to the chart's `appVersion` and cannot be overridden.
|
||||
|
||||
#### Resources
|
||||
|
||||
| Parameter | Description | Default |
|
||||
@@ -137,7 +158,6 @@ ingress:
|
||||
|-----------|-------------|---------|
|
||||
| `service.type` | Service type | `ClusterIP` |
|
||||
| `service.port` | Service port | `8000` |
|
||||
| `service.oauthPort` | OAuth service port | `8001` |
|
||||
|
||||
#### Ingress
|
||||
|
||||
@@ -371,7 +391,6 @@ helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server -f custom-values.yaml
|
||||
|
||||
```bash
|
||||
helm upgrade nextcloud-mcp ./helm/nextcloud-mcp-server \
|
||||
--set image.tag=0.21.0 \
|
||||
--set resources.limits.memory=1Gi
|
||||
```
|
||||
|
||||
|
||||
@@ -95,21 +95,17 @@ Create the name of the PVC to use for OAuth storage
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the appropriate MCP server port based on auth mode
|
||||
Return the MCP server port
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.port" -}}
|
||||
{{- if eq .Values.auth.mode "oauth" }}
|
||||
{{- .Values.auth.oauth.port }}
|
||||
{{- else }}
|
||||
{{- .Values.mcp.port }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Return the image tag
|
||||
Return the image tag (always uses chart appVersion)
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.imageTag" -}}
|
||||
{{- .Values.image.tag | default .Chart.AppVersion }}
|
||||
{{- .Chart.AppVersion }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
|
||||
@@ -46,11 +46,12 @@ spec:
|
||||
- "{{ .Values.mcp.transport }}"
|
||||
{{- if eq .Values.auth.mode "oauth" }}
|
||||
- "--oauth"
|
||||
- "--port"
|
||||
- "{{ .Values.auth.oauth.port }}"
|
||||
- "--oauth-token-type"
|
||||
- "{{ .Values.auth.oauth.tokenType }}"
|
||||
{{- end }}
|
||||
{{- with .Values.mcp.extraArgs }}
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{ include "nextcloud-mcp-server.port" . }}
|
||||
|
||||
@@ -8,8 +8,7 @@ replicaCount: 1
|
||||
image:
|
||||
repository: ghcr.io/cbcoutinho/nextcloud-mcp-server
|
||||
pullPolicy: IfNotPresent
|
||||
# Overrides the image tag whose default is the chart appVersion.
|
||||
tag: ""
|
||||
# Image tag is automatically set to chart appVersion
|
||||
|
||||
imagePullSecrets: []
|
||||
nameOverride: ""
|
||||
@@ -61,8 +60,6 @@ auth:
|
||||
|
||||
# OAuth2/OIDC settings (experimental)
|
||||
oauth:
|
||||
# Port for OAuth MCP server (default: 8001)
|
||||
port: 8001
|
||||
# OAuth token type: "jwt" or "opaque"
|
||||
tokenType: "jwt"
|
||||
# Pre-registered OAuth client ID (optional, ignored if existingSecret is set)
|
||||
@@ -97,8 +94,11 @@ auth:
|
||||
mcp:
|
||||
# Transport mode (default: streamable-http for SSE)
|
||||
transport: "streamable-http"
|
||||
# Port for basic auth mode
|
||||
# Port for MCP server (both basic auth and OAuth modes)
|
||||
port: 8000
|
||||
# Additional command-line arguments to pass to nextcloud-mcp-server
|
||||
# Example: ["--log-level", "debug", "--enable-app", "notes"]
|
||||
extraArgs: []
|
||||
|
||||
# Document processing configuration (optional)
|
||||
documentProcessing:
|
||||
@@ -171,8 +171,6 @@ securityContext:
|
||||
service:
|
||||
type: ClusterIP
|
||||
port: 8000
|
||||
# For OAuth mode, you may want to expose both ports
|
||||
oauthPort: 8001
|
||||
annotations: {}
|
||||
|
||||
ingress:
|
||||
|
||||
+2
-2
@@ -21,7 +21,7 @@ services:
|
||||
restart: always
|
||||
|
||||
app:
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:42a36b4711191273a9cf8cebfd35602909eb1bee461b7076d4d5a57f7ec2b81e
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:1e4eae55eebe094cae6f9e7b6e0b4bccf4a4fe7b7e6f6f8f57010994b3b2ee42
|
||||
restart: always
|
||||
ports:
|
||||
- 0.0.0.0:8080:80
|
||||
@@ -45,7 +45,7 @@ services:
|
||||
- REDIS_HOST=redis
|
||||
|
||||
recipes:
|
||||
image: docker.io/library/nginx:alpine@sha256:61e01287e546aac28a3f56839c136b31f590273f3b41187a36f46f6a03bbfe22
|
||||
image: docker.io/library/nginx:alpine@sha256:b3c656d55d7ad751196f21b7fd2e8d4da9cb430e32f646adcf92441b72f82b14
|
||||
restart: always
|
||||
volumes:
|
||||
- ./tests/fixtures/test_recipe.html:/usr/share/nginx/html/test_recipe.html:ro
|
||||
|
||||
@@ -0,0 +1,795 @@
|
||||
# ADR-002: Vector Database Background Sync Authentication
|
||||
|
||||
## Status
|
||||
Proposed
|
||||
|
||||
## Context
|
||||
|
||||
To enable semantic search capabilities, the MCP server needs to index user content (notes, files, calendar events) into a vector database. This requires a background sync worker that:
|
||||
|
||||
1. **Runs independently** of user requests (periodic or continuous operation)
|
||||
2. **Accesses multiple users' content** to build a comprehensive search index
|
||||
3. **Respects user permissions** - only index content users have access to
|
||||
4. **Operates in OAuth mode** - where the MCP server doesn't have traditional admin credentials
|
||||
|
||||
### Current OAuth Architecture
|
||||
|
||||
The MCP server currently operates in two authentication modes:
|
||||
|
||||
1. **BasicAuth Mode**: Uses username/password credentials (typically admin account)
|
||||
2. **OAuth Mode**: Single OAuth client, multiple user tokens
|
||||
- Users authenticate via OAuth flow
|
||||
- Each request includes user's access token
|
||||
- Server creates per-request `NextcloudClient` with user's bearer token
|
||||
- No tokens are stored server-side
|
||||
|
||||
### The Challenge
|
||||
|
||||
Background workers need long-lived authentication to:
|
||||
- Index content continuously/periodically
|
||||
- Process multiple users' data in batch operations
|
||||
- Operate when users are not actively making requests
|
||||
|
||||
However, in OAuth mode:
|
||||
- User access tokens are ephemeral (exist only during request)
|
||||
- MCP server doesn't store user credentials
|
||||
- Admin credentials defeat the purpose of OAuth
|
||||
|
||||
We need an OAuth-native solution that maintains security while enabling background operations.
|
||||
|
||||
## Decision
|
||||
|
||||
We will implement a **tiered authentication strategy** that leverages OAuth standards with graceful fallback:
|
||||
|
||||
### Primary Strategy: OAuth-Based Authentication
|
||||
|
||||
**Tier 1: Offline Access with Refresh Tokens** (Preferred)
|
||||
- Request `offline_access` scope during OAuth client registration
|
||||
- Receive and securely store user refresh tokens
|
||||
- Background worker exchanges refresh tokens for access tokens as needed
|
||||
- Respects per-user permissions and provides full audit trail
|
||||
|
||||
**Tier 2: Token Exchange (RFC 8693)** (If supported)
|
||||
- Service account exchanges its token for user-scoped tokens on-demand
|
||||
- No token storage required
|
||||
- Only available if OIDC provider implements RFC 8693
|
||||
|
||||
### Fallback Strategy: Admin Credentials
|
||||
|
||||
**Tier 3: Admin BasicAuth** (Development/Simple Deployments)
|
||||
- Dedicated sync account with read-only permissions
|
||||
- Clear documentation of security implications
|
||||
- Recommended only for trusted environments
|
||||
|
||||
### Key Architectural Principles
|
||||
|
||||
1. **Capability Detection**: Automatically detect which OAuth methods are supported
|
||||
2. **Dual-Phase Authorization**:
|
||||
- Sync worker indexes with service credentials
|
||||
- User requests verify access with user's OAuth token
|
||||
3. **Defense in Depth**: Vector database is search accelerator, not security boundary
|
||||
4. **Separation of Concerns**: Sync credentials ≠ Request credentials
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### 1. Offline Access Flow (Tier 1)
|
||||
|
||||
#### 1.1 Client Registration
|
||||
```python
|
||||
# During OAuth client registration
|
||||
client_metadata = {
|
||||
"client_name": "Nextcloud MCP Server",
|
||||
"redirect_uris": ["http://localhost:8000/oauth/callback"],
|
||||
"grant_types": ["authorization_code", "refresh_token"],
|
||||
"scope": "openid profile email offline_access notes:read files:read ...",
|
||||
"token_type": "Bearer" # or "jwt"
|
||||
}
|
||||
```
|
||||
|
||||
#### 1.2 Token Storage
|
||||
```python
|
||||
# Encrypted token storage
|
||||
class RefreshTokenStorage:
|
||||
"""Securely store and manage user refresh tokens"""
|
||||
|
||||
def __init__(self, db_path: str, encryption_key: bytes):
|
||||
self.db = Database(db_path)
|
||||
self.cipher = Fernet(encryption_key)
|
||||
|
||||
async def store_refresh_token(
|
||||
self,
|
||||
user_id: str,
|
||||
refresh_token: str,
|
||||
expires_at: int | None = None
|
||||
):
|
||||
"""Store encrypted refresh token for user"""
|
||||
encrypted_token = self.cipher.encrypt(refresh_token.encode())
|
||||
await self.db.execute(
|
||||
"INSERT OR REPLACE INTO refresh_tokens VALUES (?, ?, ?, ?)",
|
||||
(user_id, encrypted_token, expires_at, int(time.time()))
|
||||
)
|
||||
|
||||
async def get_refresh_token(self, user_id: str) -> str | None:
|
||||
"""Retrieve and decrypt refresh token"""
|
||||
row = await self.db.fetch_one(
|
||||
"SELECT encrypted_token FROM refresh_tokens WHERE user_id = ?",
|
||||
(user_id,)
|
||||
)
|
||||
if row:
|
||||
return self.cipher.decrypt(row[0]).decode()
|
||||
return None
|
||||
```
|
||||
|
||||
#### 1.3 Token Refresh Flow
|
||||
```python
|
||||
async def get_user_access_token(user_id: str) -> str:
|
||||
"""Exchange refresh token for fresh access token"""
|
||||
|
||||
# Retrieve stored refresh token
|
||||
refresh_token = await token_storage.get_refresh_token(user_id)
|
||||
if not refresh_token:
|
||||
raise ValueError(f"No refresh token for user {user_id}")
|
||||
|
||||
# Exchange for access token
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
response.raise_for_status()
|
||||
token_data = response.json()
|
||||
|
||||
# Store new refresh token if rotated
|
||||
if "refresh_token" in token_data:
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
token_data["refresh_token"],
|
||||
token_data.get("refresh_expires_in")
|
||||
)
|
||||
|
||||
return token_data["access_token"]
|
||||
```
|
||||
|
||||
#### 1.4 Capturing Refresh Tokens
|
||||
|
||||
**Challenge**: MCP protocol doesn't expose refresh tokens to server
|
||||
|
||||
**Solution**: Intercept OAuth callback
|
||||
```python
|
||||
# Add route to MCP server
|
||||
@app.route("/oauth/callback")
|
||||
async def oauth_callback(request):
|
||||
"""Capture OAuth callback and store refresh token"""
|
||||
|
||||
code = request.query_params.get("code")
|
||||
state = request.query_params.get("state")
|
||||
|
||||
# Exchange authorization code for tokens
|
||||
token_response = await exchange_authorization_code(code)
|
||||
|
||||
# Extract user info
|
||||
userinfo = await get_userinfo(token_response["access_token"])
|
||||
user_id = userinfo["sub"]
|
||||
|
||||
# Store refresh token (if present)
|
||||
if "refresh_token" in token_response:
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
token_response["refresh_token"],
|
||||
expires_at=token_response.get("refresh_expires_in")
|
||||
)
|
||||
logger.info(f"Stored refresh token for user: {user_id}")
|
||||
|
||||
# Continue MCP OAuth flow
|
||||
return redirect_to_mcp_client(state, token_response)
|
||||
```
|
||||
|
||||
### 2. Token Exchange Flow (Tier 2)
|
||||
|
||||
#### 2.1 Capability Detection
|
||||
```python
|
||||
async def check_token_exchange_support(discovery_url: str) -> bool:
|
||||
"""Check if OIDC provider supports RFC 8693 token exchange"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(discovery_url)
|
||||
discovery = response.json()
|
||||
|
||||
# Check for token exchange grant type
|
||||
grant_types = discovery.get("grant_types_supported", [])
|
||||
return "urn:ietf:params:oauth:grant-type:token-exchange" in grant_types
|
||||
```
|
||||
|
||||
#### 2.2 Token Exchange Implementation
|
||||
```python
|
||||
async def exchange_for_user_token(
|
||||
service_token: str,
|
||||
user_id: str,
|
||||
scopes: list[str]
|
||||
) -> str:
|
||||
"""Exchange service token for user-scoped token"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
|
||||
"subject_token": service_token,
|
||||
"subject_token_type": "urn:ietf:params:oauth:token-type:access_token",
|
||||
"requested_token_type": "urn:ietf:params:oauth:token-type:access_token",
|
||||
"resource": f"user:{user_id}",
|
||||
"scope": " ".join(scopes)
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.warning(f"Token exchange failed: {response.status_code}")
|
||||
raise TokenExchangeNotSupportedError()
|
||||
|
||||
return response.json()["access_token"]
|
||||
```
|
||||
|
||||
#### 2.3 Service Account Token
|
||||
```python
|
||||
async def get_service_token() -> str:
|
||||
"""Get token for MCP server's service account"""
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "client_credentials",
|
||||
"scope": "notes:read files:read calendar:read"
|
||||
},
|
||||
auth=(client_id, client_secret)
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()["access_token"]
|
||||
```
|
||||
|
||||
### 3. Sync Worker with Tiered Authentication
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/sync_worker.py
|
||||
class VectorSyncWorker:
|
||||
"""Background worker for indexing content into vector database"""
|
||||
|
||||
def __init__(self):
|
||||
self.auth_method = None
|
||||
self.token_storage = None
|
||||
self.vector_service = None
|
||||
|
||||
async def initialize(self):
|
||||
"""Detect and configure authentication method"""
|
||||
|
||||
# Try Tier 1: Offline Access
|
||||
if os.getenv("ENABLE_OFFLINE_ACCESS") == "true":
|
||||
try:
|
||||
encryption_key = os.getenv("TOKEN_ENCRYPTION_KEY")
|
||||
self.token_storage = RefreshTokenStorage(
|
||||
db_path="tokens.db",
|
||||
encryption_key=base64.b64decode(encryption_key)
|
||||
)
|
||||
self.auth_method = "offline_access"
|
||||
logger.info("✓ Using offline_access authentication")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Offline access unavailable: {e}")
|
||||
|
||||
# Try Tier 2: Token Exchange
|
||||
try:
|
||||
if await check_token_exchange_support(discovery_url):
|
||||
self.auth_method = "token_exchange"
|
||||
logger.info("✓ Using token exchange authentication (RFC 8693)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Token exchange unavailable: {e}")
|
||||
|
||||
# Fallback: Admin Credentials
|
||||
if os.getenv("NEXTCLOUD_USERNAME") and os.getenv("NEXTCLOUD_PASSWORD"):
|
||||
self.auth_method = "admin_basic"
|
||||
logger.warning(
|
||||
"⚠ Using admin BasicAuth authentication. "
|
||||
"Consider enabling offline_access for production."
|
||||
)
|
||||
return
|
||||
|
||||
raise RuntimeError("No authentication method available for sync worker")
|
||||
|
||||
async def get_user_client(self, user_id: str) -> NextcloudClient:
|
||||
"""Get authenticated client for user based on auth method"""
|
||||
|
||||
if self.auth_method == "offline_access":
|
||||
# Exchange refresh token for access token
|
||||
access_token = await get_user_access_token(user_id)
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host,
|
||||
token=access_token,
|
||||
username=user_id
|
||||
)
|
||||
|
||||
elif self.auth_method == "token_exchange":
|
||||
# Get service token and exchange for user token
|
||||
service_token = await get_service_token()
|
||||
user_token = await exchange_for_user_token(
|
||||
service_token,
|
||||
user_id,
|
||||
scopes=["notes:read", "files:read"]
|
||||
)
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host,
|
||||
token=user_token,
|
||||
username=user_id
|
||||
)
|
||||
|
||||
elif self.auth_method == "admin_basic":
|
||||
# Use admin credentials (fallback)
|
||||
return NextcloudClient.from_env()
|
||||
|
||||
raise RuntimeError(f"Unknown auth method: {self.auth_method}")
|
||||
|
||||
async def sync_user_content(self, user_id: str):
|
||||
"""Index a user's content into vector database"""
|
||||
|
||||
try:
|
||||
# Get authenticated client for this user
|
||||
client = await self.get_user_client(user_id)
|
||||
|
||||
# Sync notes
|
||||
notes = await client.notes.list_notes()
|
||||
for note in notes:
|
||||
embedding = await self.vector_service.embed(note.content)
|
||||
await self.vector_service.upsert(
|
||||
collection="nextcloud_content",
|
||||
id=f"note_{note.id}",
|
||||
vector=embedding,
|
||||
metadata={
|
||||
"user_id": user_id,
|
||||
"content_type": "note",
|
||||
"note_id": note.id,
|
||||
"title": note.title,
|
||||
"category": note.category
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(f"Synced {len(notes)} notes for user: {user_id}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to sync user {user_id}: {e}")
|
||||
|
||||
async def run(self):
|
||||
"""Main sync loop"""
|
||||
|
||||
await self.initialize()
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Get list of users to sync
|
||||
if self.auth_method == "admin_basic":
|
||||
# Admin can list all users
|
||||
admin_client = NextcloudClient.from_env()
|
||||
users = await admin_client.users.list_users()
|
||||
user_ids = [u.id for u in users]
|
||||
else:
|
||||
# OAuth methods: only sync users with stored tokens
|
||||
user_ids = await self.token_storage.get_all_user_ids()
|
||||
|
||||
logger.info(f"Syncing content for {len(user_ids)} users")
|
||||
|
||||
for user_id in user_ids:
|
||||
await self.sync_user_content(user_id)
|
||||
|
||||
logger.info("Sync complete, sleeping...")
|
||||
await asyncio.sleep(300) # 5 minutes
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Sync failed: {e}")
|
||||
await asyncio.sleep(60) # Retry after 1 minute
|
||||
```
|
||||
|
||||
### 4. User Request Verification (Dual-Phase Authorization)
|
||||
|
||||
```python
|
||||
@mcp.tool()
|
||||
@require_scopes("notes:read")
|
||||
async def nc_notes_semantic_search(
|
||||
query: str,
|
||||
ctx: Context,
|
||||
limit: int = 10
|
||||
) -> SemanticSearchResponse:
|
||||
"""Semantic search with permission verification"""
|
||||
|
||||
# Get user's OAuth client (uses their access token from request)
|
||||
user_client = get_client(ctx)
|
||||
username = user_client.username
|
||||
|
||||
# Phase 1: Vector search (fast, may include false positives)
|
||||
embedding = await vector_service.embed(query)
|
||||
candidate_results = await qdrant.search(
|
||||
collection_name="nextcloud_content",
|
||||
query_vector=embedding,
|
||||
query_filter={
|
||||
"must": [
|
||||
{
|
||||
"should": [
|
||||
{"key": "user_id", "match": {"value": username}},
|
||||
{"key": "shared_with", "match": {"any": [username]}}
|
||||
]
|
||||
},
|
||||
{"key": "content_type", "match": {"value": "note"}}
|
||||
]
|
||||
},
|
||||
limit=limit * 2 # Get extra candidates
|
||||
)
|
||||
|
||||
# Phase 2: Verify access via Nextcloud API (authoritative)
|
||||
verified_results = []
|
||||
for candidate in candidate_results:
|
||||
note_id = candidate.payload["note_id"]
|
||||
try:
|
||||
# This uses user's OAuth token - will fail if no access
|
||||
note = await user_client.notes.get_note(note_id)
|
||||
verified_results.append({
|
||||
"note": note,
|
||||
"score": candidate.score
|
||||
})
|
||||
if len(verified_results) >= limit:
|
||||
break
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 403:
|
||||
# User doesn't have access - skip silently
|
||||
logger.debug(f"Filtered out note {note_id} for {username}")
|
||||
continue
|
||||
raise
|
||||
|
||||
return SemanticSearchResponse(results=verified_results)
|
||||
```
|
||||
|
||||
### 5. Security Implementation
|
||||
|
||||
#### 5.1 Token Encryption
|
||||
```python
|
||||
# Generate encryption key (store securely)
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
# On first setup
|
||||
encryption_key = Fernet.generate_key()
|
||||
# Store in environment or secrets manager
|
||||
# NEVER commit to source control
|
||||
|
||||
# In production
|
||||
encryption_key = os.getenv("TOKEN_ENCRYPTION_KEY") # Base64-encoded Fernet key
|
||||
```
|
||||
|
||||
#### 5.2 Token Rotation
|
||||
```python
|
||||
async def rotate_refresh_token(user_id: str):
|
||||
"""Handle refresh token rotation"""
|
||||
|
||||
old_refresh_token = await token_storage.get_refresh_token(user_id)
|
||||
|
||||
# Exchange for new tokens
|
||||
response = await exchange_refresh_token(old_refresh_token)
|
||||
|
||||
if "refresh_token" in response:
|
||||
# Store new refresh token
|
||||
await token_storage.store_refresh_token(
|
||||
user_id,
|
||||
response["refresh_token"],
|
||||
expires_at=response.get("refresh_expires_in")
|
||||
)
|
||||
|
||||
# Securely delete old token
|
||||
await token_storage.delete_refresh_token(user_id, old_refresh_token)
|
||||
```
|
||||
|
||||
#### 5.3 Audit Logging
|
||||
```python
|
||||
async def audit_log(
|
||||
event: str,
|
||||
user_id: str,
|
||||
resource_type: str,
|
||||
resource_id: str,
|
||||
auth_method: str
|
||||
):
|
||||
"""Log sync operations for audit trail"""
|
||||
|
||||
await audit_db.execute(
|
||||
"INSERT INTO audit_logs VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
int(time.time()),
|
||||
event, # "index_note", "index_file"
|
||||
user_id,
|
||||
resource_type,
|
||||
resource_id,
|
||||
auth_method,
|
||||
socket.gethostname()
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
### 6. Configuration
|
||||
|
||||
#### 6.1 Environment Variables
|
||||
```bash
|
||||
# Tier 1: Offline Access
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
TOKEN_ENCRYPTION_KEY=<base64-encoded-fernet-key>
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# Tier 2: Token Exchange (auto-detected)
|
||||
# No configuration needed - detected via OIDC discovery
|
||||
|
||||
# Tier 3: Admin Fallback
|
||||
NEXTCLOUD_USERNAME=sync-bot
|
||||
NEXTCLOUD_PASSWORD=<secure-password>
|
||||
|
||||
# Vector Database
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
QDRANT_API_KEY=<api-key>
|
||||
|
||||
# Sync Configuration
|
||||
SYNC_INTERVAL_SECONDS=300
|
||||
SYNC_BATCH_SIZE=100
|
||||
```
|
||||
|
||||
#### 6.2 Docker Compose
|
||||
```yaml
|
||||
services:
|
||||
mcp-sync:
|
||||
build: .
|
||||
command: ["python", "-m", "nextcloud_mcp_server.sync_worker"]
|
||||
environment:
|
||||
- NEXTCLOUD_HOST=http://app:80
|
||||
- ENABLE_OFFLINE_ACCESS=true
|
||||
- TOKEN_ENCRYPTION_KEY=${TOKEN_ENCRYPTION_KEY}
|
||||
- QDRANT_URL=http://qdrant:6333
|
||||
# OAuth client credentials (for token refresh)
|
||||
- NEXTCLOUD_OIDC_CLIENT_ID=${NEXTCLOUD_OIDC_CLIENT_ID}
|
||||
- NEXTCLOUD_OIDC_CLIENT_SECRET=${NEXTCLOUD_OIDC_CLIENT_SECRET}
|
||||
volumes:
|
||||
- sync-tokens:/app/data
|
||||
depends_on:
|
||||
- app
|
||||
- qdrant
|
||||
|
||||
volumes:
|
||||
sync-tokens: # Persistent storage for encrypted tokens
|
||||
```
|
||||
|
||||
## Consequences
|
||||
|
||||
### Benefits
|
||||
|
||||
1. **OAuth-Native Authentication**
|
||||
- Leverages standard OAuth flows (offline_access, token exchange)
|
||||
- No reliance on admin passwords in production
|
||||
- Compatible with enterprise OIDC providers
|
||||
|
||||
2. **User-Level Permissions**
|
||||
- Each user's content indexed with their own credentials
|
||||
- Respects sharing, permissions, and access controls
|
||||
- Full audit trail of which user's token was used
|
||||
|
||||
3. **Security**
|
||||
- Tokens encrypted at rest
|
||||
- Short-lived access tokens (refreshed as needed)
|
||||
- Token rotation support
|
||||
- Defense in depth with dual-phase authorization
|
||||
|
||||
4. **Flexibility**
|
||||
- Automatic capability detection
|
||||
- Graceful degradation through authentication tiers
|
||||
- Works with varying OIDC provider capabilities
|
||||
|
||||
5. **Operational**
|
||||
- Background sync independent of user activity
|
||||
- Efficient batch processing
|
||||
- Clear separation of sync vs request credentials
|
||||
|
||||
### Limitations
|
||||
|
||||
1. **Complexity**
|
||||
- Multiple authentication paths to maintain
|
||||
- Token storage and encryption infrastructure
|
||||
- More moving parts than simple admin auth
|
||||
|
||||
2. **User Experience**
|
||||
- `offline_access` scope may require additional consent
|
||||
- Users must authenticate at least once for indexing
|
||||
- New users not automatically indexed
|
||||
|
||||
3. **OIDC Provider Dependency**
|
||||
- Token exchange requires RFC 8693 support (rare)
|
||||
- Refresh token rotation varies by provider
|
||||
- Some providers may not support offline_access
|
||||
|
||||
4. **Operational Overhead**
|
||||
- Token database maintenance
|
||||
- Monitoring token expiration
|
||||
- Handling revoked tokens gracefully
|
||||
|
||||
### Security Considerations
|
||||
|
||||
#### Threat Model
|
||||
|
||||
**Threat 1: Token Storage Breach**
|
||||
- **Mitigation**: Encryption at rest using Fernet
|
||||
- **Mitigation**: Secure key management (secrets manager)
|
||||
- **Mitigation**: Minimal token lifetime
|
||||
- **Detection**: Audit logs for unusual access patterns
|
||||
|
||||
**Threat 2: Token Replay**
|
||||
- **Mitigation**: Short-lived access tokens (refreshed frequently)
|
||||
- **Mitigation**: Token rotation on each refresh
|
||||
- **Mitigation**: Revocation support
|
||||
|
||||
**Threat 3: Privilege Escalation**
|
||||
- **Mitigation**: Dual-phase authorization (vector DB + Nextcloud API)
|
||||
- **Mitigation**: Sync worker uses same scopes as user requests
|
||||
- **Mitigation**: Per-user token isolation
|
||||
|
||||
**Threat 4: Vector Database Poisoning**
|
||||
- **Mitigation**: User requests always verify via Nextcloud API
|
||||
- **Mitigation**: Vector DB is cache/accelerator, not source of truth
|
||||
- **Mitigation**: Sync operations audited per user
|
||||
|
||||
#### Security Best Practices
|
||||
|
||||
1. **Token Encryption Key Management**
|
||||
```bash
|
||||
# Generate secure key
|
||||
python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
|
||||
# Store in secrets manager (Vault, AWS Secrets Manager, etc.)
|
||||
# Or use environment variable with restricted permissions
|
||||
```
|
||||
|
||||
2. **Token Storage Permissions**
|
||||
```bash
|
||||
# Restrict database file permissions
|
||||
chmod 600 /app/data/tokens.db
|
||||
chown mcp-server:mcp-server /app/data/tokens.db
|
||||
```
|
||||
|
||||
3. **Token Rotation Schedule**
|
||||
- Refresh access tokens every 5 minutes (or token expiry)
|
||||
- Rotate refresh tokens on each use (if provider supports)
|
||||
- Revoke tokens on user logout/deauthorization
|
||||
|
||||
4. **Monitoring and Alerting**
|
||||
- Alert on token refresh failures
|
||||
- Monitor for unusual access patterns
|
||||
- Track token age and rotation
|
||||
- Audit sync operations per user
|
||||
|
||||
### Future Enhancements
|
||||
|
||||
1. **Token Revocation Handling**
|
||||
- Webhook endpoint for token revocation events
|
||||
- Periodic validation of stored tokens
|
||||
- Graceful handling of revoked tokens
|
||||
|
||||
2. **Selective Sync**
|
||||
- Allow users to opt-in/opt-out of indexing
|
||||
- Per-content-type sync preferences
|
||||
- Privacy controls for sensitive content
|
||||
|
||||
3. **Multi-Tenant Token Storage**
|
||||
- Separate token databases per tenant
|
||||
- Key rotation per tenant
|
||||
- Tenant isolation
|
||||
|
||||
4. **Token Lifecycle Management**
|
||||
- Automatic cleanup of expired tokens
|
||||
- Token usage analytics
|
||||
- Token health dashboard
|
||||
|
||||
5. **Alternative OAuth Flows**
|
||||
- Device flow for headless sync
|
||||
- Resource owner password credentials (ROPC) as fallback
|
||||
- SAML assertion grants
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### Alternative 1: Admin BasicAuth Only
|
||||
|
||||
**Approach**: Background worker always uses admin credentials
|
||||
|
||||
**Pros**:
|
||||
- Simple implementation
|
||||
- No token storage complexity
|
||||
- Works with any authentication backend
|
||||
|
||||
**Cons**:
|
||||
- Violates principle of least privilege
|
||||
- Single powerful credential
|
||||
- No per-user audit trail
|
||||
- Bypasses OAuth entirely
|
||||
|
||||
**Decision**: Rejected for production use; kept as fallback only
|
||||
|
||||
### Alternative 2: Client Credentials Grant Only
|
||||
|
||||
**Approach**: Service account with broad read permissions
|
||||
|
||||
**Pros**:
|
||||
- OAuth-native pattern
|
||||
- No user token storage
|
||||
- Standard OAuth flow
|
||||
|
||||
**Cons**:
|
||||
- Requires client_credentials support (may not be available)
|
||||
- Still needs broad cross-user permissions
|
||||
- Not well-suited for multi-user indexing
|
||||
|
||||
**Decision**: Rejected; token exchange is better fit for multi-user scenario
|
||||
|
||||
### Alternative 3: Per-User Access Token Storage
|
||||
|
||||
**Approach**: Store user access tokens (not refresh tokens)
|
||||
|
||||
**Pros**:
|
||||
- Simpler than refresh token flow
|
||||
- No token refresh logic needed
|
||||
|
||||
**Cons**:
|
||||
- Access tokens are short-lived (1-24 hours)
|
||||
- Requires frequent re-authentication
|
||||
- Poor user experience
|
||||
- Sync gaps when tokens expire
|
||||
|
||||
**Decision**: Rejected; refresh tokens provide better UX
|
||||
|
||||
### Alternative 4: On-Demand Indexing Only
|
||||
|
||||
**Approach**: Index content when user searches (no background worker)
|
||||
|
||||
**Pros**:
|
||||
- Uses user's request token
|
||||
- No background auth needed
|
||||
- Simpler architecture
|
||||
|
||||
**Cons**:
|
||||
- Very slow first search
|
||||
- Poor user experience
|
||||
- Incomplete index
|
||||
- Can't pre-compute embeddings
|
||||
|
||||
**Decision**: Rejected; background indexing is essential for semantic search
|
||||
|
||||
### Alternative 5: Nextcloud App Tokens
|
||||
|
||||
**Approach**: Generate app-specific passwords for each user
|
||||
|
||||
**Pros**:
|
||||
- Nextcloud-native feature
|
||||
- User-controlled revocation
|
||||
- Scoped per-application
|
||||
|
||||
**Cons**:
|
||||
- Requires user interaction to create
|
||||
- May not support programmatic creation
|
||||
- Still requires secure storage
|
||||
- Not standard OAuth
|
||||
|
||||
**Decision**: Rejected; not automatable for background worker
|
||||
|
||||
## Related Decisions
|
||||
|
||||
- ADR-001: Enhanced Note Search (establishes need for vector search)
|
||||
- [Future] ADR-003: Vector Database Selection
|
||||
- [Future] ADR-004: Embedding Model Strategy
|
||||
|
||||
## References
|
||||
|
||||
- [RFC 8693: OAuth 2.0 Token Exchange](https://datatracker.ietf.org/doc/html/rfc8693)
|
||||
- [RFC 6749: OAuth 2.0 - Refresh Tokens](https://datatracker.ietf.org/doc/html/rfc6749#section-1.5)
|
||||
- [OpenID Connect Core - Offline Access](https://openid.net/specs/openid-connect-core-1_0.html#OfflineAccess)
|
||||
- [OWASP: OAuth Security Cheat Sheet](https://cheatsheetseries.owasp.org/cheatsheets/OAuth2_Cheat_Sheet.html)
|
||||
- [RFC 8707: Resource Indicators for OAuth 2.0](https://datatracker.ietf.org/doc/html/rfc8707)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,441 @@
|
||||
# Ollama Capacity Analysis: ollama.internal.coutinho.io
|
||||
|
||||
**Date**: 2025-10-30
|
||||
**Model**: nomic-embed-text:latest
|
||||
**Test Location**: From nextcloud-mcp-server host
|
||||
|
||||
## Summary
|
||||
|
||||
✅ **Ollama instance is operational and performing well**
|
||||
- Embedding generation working correctly
|
||||
- Reasonable latency for small-medium workloads
|
||||
- Good parallelism support
|
||||
- Suitable for development and small production deployments
|
||||
|
||||
## Test Results
|
||||
|
||||
### Model Configuration
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"dimensions": 768,
|
||||
"status": "operational"
|
||||
}
|
||||
```
|
||||
|
||||
### Performance Metrics
|
||||
|
||||
#### 1. Single Embedding Latency
|
||||
|
||||
**Result**: ~553ms per embedding
|
||||
- **Total time**: 0.553 seconds
|
||||
- **Includes**: Network + processing + model inference
|
||||
- **Quality**: Full 768-dimensional vector
|
||||
|
||||
**Analysis**:
|
||||
- Higher than bare-metal benchmarks (~100ms) due to network latency
|
||||
- Acceptable for interactive search queries
|
||||
- Within expected range for remote Ollama instance
|
||||
|
||||
#### 2. Batch Processing (5 items)
|
||||
|
||||
**Result**: ~1.02 seconds for 5 embeddings
|
||||
- **Per-item average**: 204ms
|
||||
- **Throughput**: ~4.9 embeddings/sec
|
||||
- **Batch efficiency**: 2.7x faster than sequential
|
||||
|
||||
**Analysis**:
|
||||
- Good batching efficiency (2.7x speedup vs 5x theoretical)
|
||||
- Optimal for background indexing
|
||||
- Network overhead amortized across batch
|
||||
|
||||
#### 3. Batch Processing (20 items)
|
||||
|
||||
**Result**: ~6.71 seconds for 20 embeddings
|
||||
- **Per-item average**: 336ms
|
||||
- **Throughput**: ~3.0 embeddings/sec
|
||||
- **Batch efficiency**: 1.65x faster than sequential
|
||||
|
||||
**Analysis**:
|
||||
- Performance degrades slightly with larger batches
|
||||
- Still faster than sequential processing
|
||||
- Matches reported Ollama behavior (quality issues at batch >16)
|
||||
- **Recommendation**: Keep batch size ≤16 for best quality
|
||||
|
||||
#### 4. Concurrent Requests (5 parallel)
|
||||
|
||||
**Result**: ~1.27 seconds for 5 parallel requests
|
||||
- **Effective parallelism**: ~4x speedup (vs 2.77s sequential)
|
||||
- **Per-request average**: 254ms
|
||||
- **Throughput**: ~3.9 requests/sec
|
||||
|
||||
**Analysis**:
|
||||
- Excellent parallelism support
|
||||
- Server handles concurrent requests efficiently
|
||||
- Network and compute overlap effectively
|
||||
- Good for multi-user scenarios
|
||||
|
||||
## Capacity Planning
|
||||
|
||||
### Current Performance Profile
|
||||
|
||||
| Metric | Value | Rating |
|
||||
|--------|-------|--------|
|
||||
| Single embedding latency | 553ms | ⚠️ Moderate |
|
||||
| Batch (5) throughput | 4.9/sec | ✅ Good |
|
||||
| Batch (20) throughput | 3.0/sec | ⚠️ Moderate |
|
||||
| Concurrent throughput | 3.9/sec | ✅ Good |
|
||||
| Network latency | ~300-400ms | ⚠️ Significant |
|
||||
|
||||
### Bottleneck Analysis
|
||||
|
||||
**Primary Bottleneck**: Network latency (~300-400ms per request)
|
||||
- Model inference: ~100-200ms (estimated)
|
||||
- Network round-trip: ~300-400ms (measured overhead)
|
||||
- **Impact**: 60-70% of total latency is network
|
||||
|
||||
**Secondary Bottleneck**: CPU/GPU capacity (unknown hardware)
|
||||
- Batch performance degrades at >16 items
|
||||
- Suggests resource constraints
|
||||
- Likely CPU-only (no GPU metrics available)
|
||||
|
||||
### Recommended Usage Patterns
|
||||
|
||||
#### ✅ **Excellent For:**
|
||||
|
||||
**1. Background Indexing**
|
||||
- Use batch size of 10-15 items
|
||||
- Expected throughput: 3-5 embeddings/sec
|
||||
- **10,000 notes**: ~30-55 minutes to index
|
||||
- **1,000 notes**: ~3-5 minutes to index
|
||||
|
||||
**2. Interactive Search**
|
||||
- Single query embedding: ~550ms
|
||||
- Acceptable for user-facing search
|
||||
- Add 100-200ms for vector search + verification
|
||||
- **Total search time**: ~650-750ms (reasonable UX)
|
||||
|
||||
**3. Multi-User Development**
|
||||
- 5-10 concurrent users: Comfortable
|
||||
- Good parallelism support
|
||||
- Network latency dominates (shared)
|
||||
|
||||
#### ⚠️ **Consider Alternatives For:**
|
||||
|
||||
**1. Real-Time Applications**
|
||||
- Sub-100ms latency requirements
|
||||
- High-frequency queries (>10/sec sustained)
|
||||
- Consider: Local embeddings or Infinity
|
||||
|
||||
**2. Large-Scale Batch Processing**
|
||||
- >100,000 documents to index
|
||||
- >10 embeddings/sec sustained
|
||||
- Consider: GPU-accelerated TEI
|
||||
|
||||
**3. Production with >50 Users**
|
||||
- High concurrent load
|
||||
- Latency sensitivity
|
||||
- Consider: Dedicated embedding service
|
||||
|
||||
### Deployment Scenarios
|
||||
|
||||
#### Scenario 1: Development Environment
|
||||
|
||||
**Profile**:
|
||||
- 1-3 developers
|
||||
- 1,000-5,000 notes total
|
||||
- Occasional searches/indexing
|
||||
|
||||
**Verdict**: ✅ **Perfect fit**
|
||||
- Initial index: ~5-15 minutes (one-time)
|
||||
- Incremental updates: <1 minute
|
||||
- Search latency: Acceptable
|
||||
- No infrastructure changes needed
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=600 # 10 minutes
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
```
|
||||
|
||||
#### Scenario 2: Small Production (10-20 users)
|
||||
|
||||
**Profile**:
|
||||
- 10-20 active users
|
||||
- 10,000-50,000 notes total
|
||||
- 50-200 searches/day
|
||||
- Nightly incremental indexing
|
||||
|
||||
**Verdict**: ✅ **Suitable with optimizations**
|
||||
- Initial index: 1-3 hours (run overnight)
|
||||
- Incremental: 5-15 minutes/night
|
||||
- Search: Acceptable for most users
|
||||
- Monitor network latency
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=86400 # Daily at night
|
||||
VECTOR_SYNC_BATCH_SIZE=12 # Conservative for quality
|
||||
SEARCH_TIMEOUT_MS=1000 # Account for 550ms latency
|
||||
```
|
||||
|
||||
**Optimizations**:
|
||||
- Run sync during off-hours
|
||||
- Cache query embeddings (common searches)
|
||||
- Use hybrid search (keyword + semantic)
|
||||
|
||||
#### Scenario 3: Medium Production (50-100 users)
|
||||
|
||||
**Profile**:
|
||||
- 50-100 active users
|
||||
- 100,000+ notes
|
||||
- 500-1000 searches/day
|
||||
- Real-time indexing desired
|
||||
|
||||
**Verdict**: ⚠️ **Marginal - monitor closely**
|
||||
- Initial index: 5-10 hours
|
||||
- Search latency: May feel slow for some users
|
||||
- Concurrent load: Approaching limits
|
||||
- **Recommendation**: Plan migration to Infinity
|
||||
|
||||
**Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_INTERVAL=3600 # Hourly
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
SEMANTIC_WEIGHT=0.5 # Rely more on keyword search
|
||||
SEARCH_TIMEOUT_MS=2000 # Generous timeout
|
||||
```
|
||||
|
||||
**Migration Path**:
|
||||
- Start with Ollama
|
||||
- Monitor latency metrics
|
||||
- When p95 latency >1s, migrate to Infinity
|
||||
- Keep Ollama as fallback
|
||||
|
||||
#### Scenario 4: Large Production (>100 users)
|
||||
|
||||
**Profile**:
|
||||
- >100 active users
|
||||
- >500,000 notes
|
||||
- >1000 searches/day
|
||||
- Real-time expectations
|
||||
|
||||
**Verdict**: ❌ **Not recommended**
|
||||
- Latency too high for scale
|
||||
- Throughput insufficient
|
||||
- Network becomes bottleneck
|
||||
- **Recommendation**: Use Infinity or TEI from start
|
||||
|
||||
## Network Latency Optimization
|
||||
|
||||
### Current Overhead: ~300-400ms
|
||||
|
||||
**If MCP server runs closer to Ollama**:
|
||||
```
|
||||
Same VPC/network: ~1-5ms (300-400ms savings!)
|
||||
Same host: <1ms (300-400ms savings!)
|
||||
```
|
||||
|
||||
### Recommendation
|
||||
|
||||
**Option A: Co-locate MCP server with Ollama**
|
||||
- Reduces latency from 550ms → 150-200ms
|
||||
- 2.5-3x improvement
|
||||
- Makes Ollama competitive with cloud APIs
|
||||
|
||||
**Option B: Keep separate (current)**
|
||||
- Simpler deployment
|
||||
- Better security isolation
|
||||
- Accept 550ms latency
|
||||
|
||||
**Option C: Add Infinity container to MCP server**
|
||||
- Best of both worlds
|
||||
- Use Infinity for speed (local)
|
||||
- Fallback to Ollama if needed
|
||||
|
||||
## Capacity Estimates
|
||||
|
||||
### Indexing Capacity
|
||||
|
||||
**Sustained Throughput**: 3-4 embeddings/sec (conservative)
|
||||
|
||||
| Document Count | Index Time | Notes |
|
||||
|----------------|------------|-------|
|
||||
| 1,000 | 4-5 min | Quick |
|
||||
| 5,000 | 20-25 min | Reasonable |
|
||||
| 10,000 | 40-50 min | Acceptable |
|
||||
| 50,000 | 3.5-4.5 hours | Overnight job |
|
||||
| 100,000 | 7-9 hours | Long batch |
|
||||
| 500,000 | 35-45 hours | Not recommended |
|
||||
|
||||
**Incremental Updates** (10% change daily):
|
||||
- 1,000 docs: ~30 sec
|
||||
- 10,000 docs: ~5 min
|
||||
- 50,000 docs: ~25 min
|
||||
|
||||
### Search Capacity
|
||||
|
||||
**Query Latency Budget**:
|
||||
- Embedding: 550ms
|
||||
- Vector search: 50-100ms
|
||||
- Permission verification: 50-100ms
|
||||
- **Total**: 650-750ms
|
||||
|
||||
**Concurrent Users** (assuming 1 search every 5 minutes):
|
||||
- 10 users: 2 queries/min → Comfortable
|
||||
- 50 users: 10 queries/min → Near limit
|
||||
- 100 users: 20 queries/min → Over capacity
|
||||
|
||||
**Peak Load** (all users search at once):
|
||||
- Parallelism: ~4 concurrent
|
||||
- Queue time: Proportional to position
|
||||
- 10 simultaneous: ~1.5-2 sec for last user
|
||||
- 50 simultaneous: ~7-10 sec for last user
|
||||
|
||||
## Recommendations
|
||||
|
||||
### Immediate Actions (Development)
|
||||
|
||||
1. **✅ Use Ollama as-is**
|
||||
- Current setup is perfect for dev/testing
|
||||
- No changes needed
|
||||
- Start building semantic search
|
||||
|
||||
2. **Configuration**:
|
||||
```bash
|
||||
OLLAMA_URL=https://ollama.internal.coutinho.io
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
VECTOR_SYNC_BATCH_SIZE=10
|
||||
```
|
||||
|
||||
3. **Add Monitoring**:
|
||||
```python
|
||||
# Track these metrics
|
||||
- embedding_latency_seconds (histogram)
|
||||
- embedding_batch_size (gauge)
|
||||
- embedding_errors_total (counter)
|
||||
```
|
||||
|
||||
### Short-Term (Small Production)
|
||||
|
||||
1. **Optimize Batching**:
|
||||
- Use batch size 10-12 (quality sweet spot)
|
||||
- Process during off-hours
|
||||
- Implement incremental sync
|
||||
|
||||
2. **Add Caching**:
|
||||
```python
|
||||
# Cache common query embeddings
|
||||
@lru_cache(maxsize=1000)
|
||||
async def embed_with_cache(query: str):
|
||||
return await ollama.embed(query)
|
||||
```
|
||||
|
||||
3. **Monitor Metrics**:
|
||||
- P50, P95, P99 latency
|
||||
- Throughput (embeddings/sec)
|
||||
- Error rates
|
||||
|
||||
### Medium-Term (If Scaling Up)
|
||||
|
||||
1. **Add Infinity Container** (when >50 users or latency issues):
|
||||
```yaml
|
||||
services:
|
||||
infinity:
|
||||
image: michaelf34/infinity:latest
|
||||
# Local to MCP server - ~10-20ms latency
|
||||
```
|
||||
|
||||
2. **Implement Tiered Fallback**:
|
||||
```
|
||||
Infinity (local, fast) → Ollama (remote, slower) → Local model
|
||||
```
|
||||
|
||||
3. **Load Testing**:
|
||||
- Simulate 50-100 concurrent users
|
||||
- Measure actual throughput limits
|
||||
- Identify breaking points
|
||||
|
||||
### Long-Term (Enterprise Scale)
|
||||
|
||||
1. **Migrate to TEI Cluster** (when >100 users):
|
||||
- GPU-accelerated
|
||||
- Horizontal scaling
|
||||
- <20ms latency
|
||||
|
||||
2. **Consider Managed Services**:
|
||||
- Pinecone, Qdrant Cloud
|
||||
- Removes operational burden
|
||||
- Better SLAs
|
||||
|
||||
## Testing Recommendations
|
||||
|
||||
### Load Testing Script
|
||||
|
||||
```bash
|
||||
# Test sustained load
|
||||
for i in {1..100}; do
|
||||
curl -s https://ollama.internal.coutinho.io/api/embed \
|
||||
-d "{\"model\": \"nomic-embed-text\", \"input\": \"Test $i\"}" &
|
||||
|
||||
# Rate limit: 5 concurrent
|
||||
if [ $(($i % 5)) -eq 0 ]; then
|
||||
wait
|
||||
sleep 1
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
### Metrics to Collect
|
||||
|
||||
1. **Latency Distribution**:
|
||||
- P50 (median)
|
||||
- P95 (acceptable)
|
||||
- P99 (outliers)
|
||||
|
||||
2. **Throughput**:
|
||||
- Embeddings/second
|
||||
- Peak vs sustained
|
||||
|
||||
3. **Error Rates**:
|
||||
- Timeouts
|
||||
- Server errors
|
||||
- Quality issues
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Your Ollama instance is ready for development and small production use!**
|
||||
|
||||
**Current Capacity**:
|
||||
- ✅ Development: Unlimited
|
||||
- ✅ Small prod (10-20 users, 10k docs): Comfortable
|
||||
- ⚠️ Medium prod (50 users, 50k docs): Monitoring needed
|
||||
- ❌ Large prod (>100 users): Migrate to Infinity/TEI
|
||||
|
||||
**Key Strengths**:
|
||||
- Fully operational
|
||||
- Good parallelism
|
||||
- Acceptable latency for most use cases
|
||||
- Easy to integrate
|
||||
|
||||
**Key Limitations**:
|
||||
- Network latency adds 300-400ms overhead
|
||||
- Batch quality issues at >16 items
|
||||
- Limited scalability beyond 50 users
|
||||
|
||||
**Recommendation**:
|
||||
Start using Ollama immediately for development. Add monitoring and plan for Infinity when you approach 50 users or experience latency issues. The abstraction layer in ADR-003 makes migration seamless.
|
||||
|
||||
**Next Steps**:
|
||||
1. Configure MCP server with Ollama URL
|
||||
2. Implement semantic search tools
|
||||
3. Add basic monitoring
|
||||
4. Test with real workload
|
||||
5. Scale up as needed
|
||||
@@ -0,0 +1,796 @@
|
||||
# Ollama Embeddings Investigation
|
||||
|
||||
**Date**: 2025-10-30
|
||||
**Status**: Recommendation for Integration
|
||||
|
||||
## Executive Summary
|
||||
|
||||
Ollama provides a **local, self-hosted embedding solution** that is excellent for **development and small-scale deployments** but has **performance limitations** compared to specialized embedding inference engines (TEI, Infinity).
|
||||
|
||||
**Recommendation**: Include Ollama as **Tier 2 fallback** in our embedding strategy (after cloud APIs, before local sentence-transformers), prioritizing ease of setup over maximum performance.
|
||||
|
||||
## Overview
|
||||
|
||||
Ollama is primarily known as a local LLM runner but added embedding model support in version 0.1.26, making it a convenient option for generating vector embeddings without external API dependencies.
|
||||
|
||||
### Key Characteristics
|
||||
|
||||
- **Local & Self-Hosted**: No external API calls, full privacy
|
||||
- **Easy Setup**: Single binary, simple model downloads (`ollama pull nomic-embed-text`)
|
||||
- **Unified Platform**: Same tool for both LLMs and embeddings
|
||||
- **OpenAI Compatible**: `/v1/embeddings` endpoint for drop-in replacement
|
||||
- **Multi-Platform**: Linux, macOS, Windows support
|
||||
- **GPU Support**: CUDA, ROCm, Metal acceleration
|
||||
|
||||
## API Details
|
||||
|
||||
### Endpoint Structure
|
||||
|
||||
**New API** (recommended):
|
||||
```bash
|
||||
POST http://localhost:11434/api/embed
|
||||
```
|
||||
|
||||
**OpenAI Compatible**:
|
||||
```bash
|
||||
POST http://localhost:11434/v1/embeddings
|
||||
```
|
||||
|
||||
**Legacy API** (deprecated):
|
||||
```bash
|
||||
POST http://localhost:11434/api/embeddings
|
||||
```
|
||||
|
||||
### Request Format
|
||||
|
||||
**Single Text Embedding**:
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"input": "Text to embed"
|
||||
}
|
||||
```
|
||||
|
||||
**Batch Embedding** (since v0.2.0):
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"input": [
|
||||
"First text to embed",
|
||||
"Second text to embed",
|
||||
"Third text to embed"
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Response Format
|
||||
|
||||
```json
|
||||
{
|
||||
"model": "nomic-embed-text",
|
||||
"embeddings": [
|
||||
[0.123, -0.456, 0.789, ...], // 768 dimensions for nomic-embed-text
|
||||
[0.234, -0.567, 0.890, ...]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Python Integration
|
||||
|
||||
```python
|
||||
import ollama
|
||||
|
||||
# Single embedding
|
||||
response = ollama.embed(
|
||||
model='nomic-embed-text',
|
||||
input='Text to embed'
|
||||
)
|
||||
embedding = response['embeddings'][0]
|
||||
|
||||
# Batch embeddings (more efficient)
|
||||
response = ollama.embed(
|
||||
model='nomic-embed-text',
|
||||
input=[
|
||||
'First text',
|
||||
'Second text',
|
||||
'Third text'
|
||||
]
|
||||
)
|
||||
embeddings = response['embeddings']
|
||||
```
|
||||
|
||||
## Available Models
|
||||
|
||||
### 1. nomic-embed-text (Recommended)
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 137M
|
||||
- **Dimensions**: 768
|
||||
- **Context Length**: 8,192 tokens (2K effective)
|
||||
- **Size**: 274MB
|
||||
- **Architecture**: BERT-based
|
||||
|
||||
**Performance**:
|
||||
- Outperforms OpenAI `text-embedding-ada-002` and `text-embedding-3-small`
|
||||
- Excellent for long-context tasks
|
||||
- Strong general-purpose performance
|
||||
|
||||
**Use Cases**:
|
||||
- General RAG applications
|
||||
- Long document processing
|
||||
- Semantic search
|
||||
- Document clustering
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull nomic-embed-text
|
||||
```
|
||||
|
||||
### 2. mxbai-embed-large
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 334M
|
||||
- **Dimensions**: 1,024
|
||||
- **Context Length**: 512 tokens
|
||||
- **Architecture**: BERT-large optimized
|
||||
|
||||
**Performance**:
|
||||
- Claims to outperform commercial models
|
||||
- Higher precision for complex queries
|
||||
- Best quality but slower
|
||||
|
||||
**Use Cases**:
|
||||
- High-precision semantic search
|
||||
- Enterprise knowledge bases
|
||||
- Multilingual content
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull mxbai-embed-large
|
||||
```
|
||||
|
||||
### 3. all-minilm
|
||||
|
||||
**Specifications**:
|
||||
- **Parameters**: 23M
|
||||
- **Dimensions**: 384
|
||||
- **Context Length**: 256 tokens
|
||||
- **Size**: Smallest footprint
|
||||
|
||||
**Performance**:
|
||||
- Fastest processing speed
|
||||
- Good for sentence-level tasks
|
||||
- Limited context window
|
||||
|
||||
**Use Cases**:
|
||||
- Real-time applications
|
||||
- Resource-constrained environments
|
||||
- High-throughput scenarios
|
||||
- Development/testing
|
||||
|
||||
**Pull Command**:
|
||||
```bash
|
||||
ollama pull all-minilm
|
||||
```
|
||||
|
||||
## Performance Benchmarks
|
||||
|
||||
### Throughput Comparison
|
||||
|
||||
| Hardware | Model | Batch Size | Throughput | Notes |
|
||||
|----------|-------|------------|------------|-------|
|
||||
| RTX 4090 (24GB) | nomic-embed-text | 256 | 12,450 tok/sec | GPU-accelerated |
|
||||
| RTX 4090 (24GB) | mxbai-embed-large | 128 | 8,920 tok/sec | GPU-accelerated |
|
||||
| Intel i9-13900K (CPU) | nomic-embed-text | 32 | 3,250 tok/sec | CPU-only |
|
||||
| Intel i9-13900K (CPU) | mxbai-embed-large | 16 | 2,180 tok/sec | CPU-only |
|
||||
|
||||
### Latency Comparison
|
||||
|
||||
**Single Request Latency** (RTX 4060):
|
||||
- Ollama: ~99ms
|
||||
- TEI: ~20ms (5x faster)
|
||||
- Infinity: ~30-40ms (2.5-3x faster)
|
||||
|
||||
**Batch Processing**:
|
||||
- Optimal batch size: 32-64 (model dependent)
|
||||
- Performance degrades with batches >16 (quality issues reported)
|
||||
- 2x slower than direct sentence-transformers usage
|
||||
|
||||
### Engine Comparison
|
||||
|
||||
Based on benchmarks from Baseten (2024):
|
||||
|
||||
| Engine | Relative Throughput | Notes |
|
||||
|--------|---------------------|-------|
|
||||
| BEI | 9.0x (baseline) | Fastest (proprietary) |
|
||||
| TEI | 4.5x | Open source, Rust-based |
|
||||
| Infinity | 3.5x | PyTorch/ONNX optimized |
|
||||
| vLLM | 3.0x | General LLM inference |
|
||||
| **Ollama** | **1.0x** | Slowest for embeddings |
|
||||
|
||||
**Key Insight**: Ollama is **5-9x slower** than specialized embedding engines but trades performance for ease of use and unified platform.
|
||||
|
||||
## Integration Implementation
|
||||
|
||||
### Python Client Wrapper
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/embeddings/ollama.py
|
||||
import httpx
|
||||
from typing import List
|
||||
|
||||
|
||||
class OllamaEmbedding:
|
||||
"""Ollama embedding provider"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
base_url: str = "http://localhost:11434",
|
||||
model: str = "nomic-embed-text"
|
||||
):
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model = model
|
||||
self.client = httpx.AsyncClient(timeout=60.0)
|
||||
|
||||
# Model dimension mapping
|
||||
self.dimensions = {
|
||||
"nomic-embed-text": 768,
|
||||
"mxbai-embed-large": 1024,
|
||||
"all-minilm": 384
|
||||
}
|
||||
self.dimension = self.dimensions.get(model, 768)
|
||||
|
||||
async def embed(self, text: str) -> List[float]:
|
||||
"""Generate embedding for single text"""
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={
|
||||
"model": self.model,
|
||||
"input": text
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return data["embeddings"][0]
|
||||
|
||||
async def embed_batch(
|
||||
self,
|
||||
texts: List[str],
|
||||
batch_size: int = 32
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts in batches.
|
||||
|
||||
Note: Ollama has reported quality issues with batch sizes >16.
|
||||
We use batch_size=32 as default but allow configuration.
|
||||
"""
|
||||
all_embeddings = []
|
||||
|
||||
# Process in chunks to avoid batch size issues
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i:i + batch_size]
|
||||
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={
|
||||
"model": self.model,
|
||||
"input": batch
|
||||
}
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
all_embeddings.extend(data["embeddings"])
|
||||
|
||||
return all_embeddings
|
||||
|
||||
async def check_health(self) -> bool:
|
||||
"""Check if Ollama server is running and model is available"""
|
||||
try:
|
||||
# Check if server is up
|
||||
response = await self.client.get(f"{self.base_url}/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
# Check if model is pulled
|
||||
models = response.json().get("models", [])
|
||||
model_names = [m["name"] for m in models]
|
||||
|
||||
if self.model not in model_names:
|
||||
raise ValueError(
|
||||
f"Model '{self.model}' not found. "
|
||||
f"Run: ollama pull {self.model}"
|
||||
)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Ollama health check failed: {e}")
|
||||
|
||||
async def close(self):
|
||||
"""Close HTTP client"""
|
||||
await self.client.aclose()
|
||||
```
|
||||
|
||||
### Auto-Detection in Embedding Service
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/embeddings/service.py
|
||||
from typing import Optional
|
||||
import os
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingService:
|
||||
"""Unified embedding service with automatic provider detection"""
|
||||
|
||||
def __init__(self):
|
||||
self.provider = None
|
||||
self._detect_provider()
|
||||
|
||||
def _detect_provider(self):
|
||||
"""Auto-detect available embedding provider"""
|
||||
|
||||
# Tier 1: OpenAI API (best quality)
|
||||
if os.getenv("OPENAI_API_KEY"):
|
||||
from .openai import OpenAIEmbedding
|
||||
self.provider = OpenAIEmbedding(
|
||||
model=os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"),
|
||||
api_key=os.getenv("OPENAI_API_KEY")
|
||||
)
|
||||
logger.info("✓ Using OpenAI embeddings")
|
||||
return
|
||||
|
||||
# Tier 2a: Infinity (optimized self-hosted)
|
||||
if os.getenv("INFINITY_URL"):
|
||||
from .infinity import InfinityEmbedding
|
||||
try:
|
||||
self.provider = InfinityEmbedding(
|
||||
url=os.getenv("INFINITY_URL"),
|
||||
model=os.getenv("EMBEDDING_MODEL", "BAAI/bge-small-en-v1.5")
|
||||
)
|
||||
logger.info("✓ Using Infinity embeddings (optimized)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Infinity unavailable: {e}")
|
||||
|
||||
# Tier 2b: Ollama (easy self-hosted)
|
||||
if os.getenv("OLLAMA_URL"):
|
||||
from .ollama import OllamaEmbedding
|
||||
try:
|
||||
self.provider = OllamaEmbedding(
|
||||
base_url=os.getenv("OLLAMA_URL", "http://localhost:11434"),
|
||||
model=os.getenv("OLLAMA_MODEL", "nomic-embed-text")
|
||||
)
|
||||
# Verify Ollama is running and model is available
|
||||
import asyncio
|
||||
asyncio.run(self.provider.check_health())
|
||||
logger.info("✓ Using Ollama embeddings (easy setup)")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Ollama unavailable: {e}")
|
||||
|
||||
# Tier 3: Local model (fallback)
|
||||
logger.warning("No cloud/hosted embeddings available, using local model")
|
||||
from .local import LocalEmbedding
|
||||
self.provider = LocalEmbedding(
|
||||
model=os.getenv("LOCAL_EMBEDDING_MODEL", "all-MiniLM-L6-v2")
|
||||
)
|
||||
logger.info("✓ Using local embeddings (CPU fallback)")
|
||||
|
||||
async def embed(self, text: str):
|
||||
"""Generate embedding for text"""
|
||||
return await self.provider.embed(text)
|
||||
|
||||
async def embed_batch(self, texts: list[str]):
|
||||
"""Generate embeddings for multiple texts"""
|
||||
return await self.provider.embed_batch(texts)
|
||||
|
||||
@property
|
||||
def dimension(self) -> int:
|
||||
"""Get embedding dimension"""
|
||||
return self.provider.dimension
|
||||
```
|
||||
|
||||
### Docker Compose Configuration
|
||||
|
||||
```yaml
|
||||
services:
|
||||
# Ollama embedding service
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:11434:11434
|
||||
volumes:
|
||||
- ollama_models:/root/.ollama
|
||||
# Optional: GPU support
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
# Pull models on startup
|
||||
entrypoint: ["/bin/sh", "-c"]
|
||||
command:
|
||||
- |
|
||||
ollama serve &
|
||||
sleep 5
|
||||
ollama pull nomic-embed-text
|
||||
wait
|
||||
|
||||
# MCP Server with Ollama embeddings
|
||||
mcp:
|
||||
build: .
|
||||
depends_on:
|
||||
- ollama
|
||||
environment:
|
||||
# ... other vars ...
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
# Vector sync worker
|
||||
mcp-vector-sync:
|
||||
build: .
|
||||
command: ["python", "-m", "nextcloud_mcp_server.sync.vector_indexer"]
|
||||
depends_on:
|
||||
- ollama
|
||||
- qdrant
|
||||
environment:
|
||||
# ... other vars ...
|
||||
- OLLAMA_URL=http://ollama:11434
|
||||
- OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
volumes:
|
||||
ollama_models:
|
||||
```
|
||||
|
||||
## Advantages of Ollama
|
||||
|
||||
### 1. **Ease of Setup**
|
||||
|
||||
```bash
|
||||
# Install Ollama
|
||||
curl -fsSL https://ollama.com/install.sh | sh
|
||||
|
||||
# Pull embedding model
|
||||
ollama pull nomic-embed-text
|
||||
|
||||
# Done! API available at localhost:11434
|
||||
```
|
||||
|
||||
No complex configuration, no Docker registries, no model conversion.
|
||||
|
||||
### 2. **Privacy & Data Sovereignty**
|
||||
|
||||
- All processing happens locally
|
||||
- No data leaves your infrastructure
|
||||
- No API keys or external dependencies
|
||||
- Ideal for sensitive content (medical, legal, financial)
|
||||
|
||||
### 3. **Unified Platform**
|
||||
|
||||
- Same tool for LLMs and embeddings
|
||||
- Consistent API across model types
|
||||
- Single point of management
|
||||
- Simplified operations
|
||||
|
||||
### 4. **Developer Experience**
|
||||
|
||||
- Simple API (similar to OpenAI)
|
||||
- Good documentation
|
||||
- Active community
|
||||
- Framework integrations (LangChain, LlamaIndex)
|
||||
|
||||
### 5. **Cost**
|
||||
|
||||
- Free and open source
|
||||
- No per-token API costs
|
||||
- Only infrastructure costs (compute)
|
||||
|
||||
### 6. **Model Variety**
|
||||
|
||||
Growing library of embedding models:
|
||||
- nomic-embed-text (general purpose)
|
||||
- mxbai-embed-large (high quality)
|
||||
- all-minilm (fast)
|
||||
- More models added regularly
|
||||
|
||||
## Limitations of Ollama
|
||||
|
||||
### 1. **Performance**
|
||||
|
||||
- **5-9x slower** than specialized engines (TEI, Infinity)
|
||||
- Not optimized specifically for embedding inference
|
||||
- Batch processing issues at larger batch sizes (>16)
|
||||
- Higher latency compared to alternatives
|
||||
|
||||
### 2. **Scalability**
|
||||
|
||||
- Single-instance deployment (no native clustering)
|
||||
- Limited concurrent request handling
|
||||
- Not designed for high-throughput production
|
||||
- Resource usage per request is higher
|
||||
|
||||
### 3. **Batch Processing Issues**
|
||||
|
||||
- Quality degradation reported with large batches
|
||||
- Optimal batch size: 32-64 (conservative)
|
||||
- Less efficient than specialized engines
|
||||
- GitHub issues tracking batch problems (#6262)
|
||||
|
||||
### 4. **Resource Usage**
|
||||
|
||||
- Models stay loaded in memory (VRAM/RAM)
|
||||
- Higher memory footprint per model
|
||||
- GPU context switching overhead
|
||||
- Not as memory-efficient as specialized engines
|
||||
|
||||
### 5. **Production Features**
|
||||
|
||||
- No built-in load balancing
|
||||
- Limited monitoring/metrics
|
||||
- No automatic scaling
|
||||
- Basic error handling
|
||||
|
||||
## Use Case Recommendations
|
||||
|
||||
### ✅ **Excellent For:**
|
||||
|
||||
1. **Development & Testing**
|
||||
- Quick setup for prototyping
|
||||
- Local development environments
|
||||
- Testing embedding pipelines
|
||||
|
||||
2. **Small Deployments**
|
||||
- <10 users
|
||||
- <10,000 documents
|
||||
- Infrequent searches (<100/day)
|
||||
- Hobbyist/personal projects
|
||||
|
||||
3. **Privacy-Critical Applications**
|
||||
- Medical/healthcare records
|
||||
- Legal documents
|
||||
- Financial data
|
||||
- Air-gapped environments
|
||||
|
||||
4. **Unified LLM Stack**
|
||||
- Projects already using Ollama for LLMs
|
||||
- Simplified operations
|
||||
- Consistent tooling
|
||||
|
||||
5. **Educational/Learning**
|
||||
- Teaching RAG concepts
|
||||
- Learning embeddings
|
||||
- Hackathons/workshops
|
||||
|
||||
### ⚠️ **Consider Alternatives For:**
|
||||
|
||||
1. **Production at Scale**
|
||||
- >100 users
|
||||
- >100,000 documents
|
||||
- High query volume (>1000/day)
|
||||
- Use: TEI or Infinity
|
||||
|
||||
2. **Performance-Critical**
|
||||
- Real-time search (<50ms latency)
|
||||
- High-throughput batch processing
|
||||
- Use: TEI with GPU
|
||||
|
||||
3. **Enterprise Deployments**
|
||||
- Need for high availability
|
||||
- Load balancing requirements
|
||||
- Advanced monitoring
|
||||
- Use: Managed services or TEI cluster
|
||||
|
||||
4. **Large-Scale Indexing**
|
||||
- Millions of documents
|
||||
- Continuous high-volume ingestion
|
||||
- Use: Infinity or commercial solutions
|
||||
|
||||
## Integration Strategy
|
||||
|
||||
### Recommended Tier Placement
|
||||
|
||||
**Update ADR-003 embedding strategy:**
|
||||
|
||||
```
|
||||
Tier 1: OpenAI API (best quality, requires API key)
|
||||
↓ fallback
|
||||
Tier 2a: Infinity (optimized self-hosted, complex setup)
|
||||
↓ fallback
|
||||
Tier 2b: Ollama (easy self-hosted, moderate performance) ← NEW
|
||||
↓ fallback
|
||||
Tier 3: Local sentence-transformers (CPU fallback, simplest)
|
||||
```
|
||||
|
||||
### Configuration
|
||||
|
||||
```bash
|
||||
# Option 1: Use Infinity (if available)
|
||||
INFINITY_URL=http://infinity:7997
|
||||
EMBEDDING_MODEL=BAAI/bge-small-en-v1.5
|
||||
|
||||
# Option 2: Use Ollama (if Infinity unavailable)
|
||||
OLLAMA_URL=http://ollama:11434
|
||||
OLLAMA_MODEL=nomic-embed-text
|
||||
|
||||
# Option 3: Use local model (automatic fallback)
|
||||
# No configuration needed
|
||||
```
|
||||
|
||||
### When to Choose Ollama
|
||||
|
||||
**Choose Ollama if**:
|
||||
- You're already using Ollama for LLMs
|
||||
- You need privacy/data sovereignty
|
||||
- You have <10k documents and <100 users
|
||||
- Ease of setup is more important than max performance
|
||||
- You're in development/testing phase
|
||||
|
||||
**Choose Infinity/TEI if**:
|
||||
- You need maximum throughput (>1000 embeddings/sec)
|
||||
- You have >100k documents
|
||||
- Latency is critical (<50ms)
|
||||
- You're in production with >100 users
|
||||
|
||||
**Choose OpenAI API if**:
|
||||
- You're okay with cloud dependencies
|
||||
- You need best-in-class quality
|
||||
- Cost is not a concern (~$0.02 per 1M tokens)
|
||||
|
||||
## Production Deployment Guidance
|
||||
|
||||
### Small Production (Ollama Acceptable)
|
||||
|
||||
**Profile**:
|
||||
- 5-20 users
|
||||
- 1,000-10,000 documents
|
||||
- 50-200 searches/day
|
||||
- <2 sec acceptable latency
|
||||
|
||||
**Configuration**:
|
||||
```yaml
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 4GB
|
||||
cpus: "2.0"
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia # GPU if available
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
environment:
|
||||
- OLLAMA_NUM_PARALLEL=2 # Concurrent requests
|
||||
```
|
||||
|
||||
**Expected Performance**:
|
||||
- Embedding latency: 100-200ms
|
||||
- Throughput: 5-10 embeddings/sec
|
||||
- Memory: 2-3GB (model loaded)
|
||||
|
||||
### Medium Production (Use Infinity/TEI)
|
||||
|
||||
**Profile**:
|
||||
- 20-200 users
|
||||
- 10,000-1M documents
|
||||
- 500-5,000 searches/day
|
||||
- <500ms acceptable latency
|
||||
|
||||
**Recommendation**: Migrate to Infinity or TEI
|
||||
```yaml
|
||||
infinity:
|
||||
image: michaelf34/infinity:latest
|
||||
# Better throughput and latency
|
||||
```
|
||||
|
||||
### Large Production (Use Specialized Solution)
|
||||
|
||||
**Profile**:
|
||||
- >200 users
|
||||
- >1M documents
|
||||
- >5,000 searches/day
|
||||
- <100ms required latency
|
||||
|
||||
**Recommendation**: Use TEI cluster or commercial service
|
||||
|
||||
## Monitoring Considerations
|
||||
|
||||
### Key Metrics to Track
|
||||
|
||||
```python
|
||||
# Add Ollama-specific metrics
|
||||
from prometheus_client import Histogram, Counter, Gauge
|
||||
|
||||
ollama_embedding_latency = Histogram(
|
||||
'ollama_embedding_duration_seconds',
|
||||
'Ollama embedding generation time',
|
||||
['model', 'batch_size']
|
||||
)
|
||||
|
||||
ollama_batch_size = Gauge(
|
||||
'ollama_batch_size',
|
||||
'Current batch size being processed'
|
||||
)
|
||||
|
||||
ollama_errors = Counter(
|
||||
'ollama_errors_total',
|
||||
'Ollama embedding errors',
|
||||
['error_type']
|
||||
)
|
||||
```
|
||||
|
||||
### Health Checks
|
||||
|
||||
```python
|
||||
async def ollama_health_check():
|
||||
"""Check Ollama availability"""
|
||||
try:
|
||||
async with httpx.AsyncClient() as client:
|
||||
# Check server
|
||||
response = await client.get("http://ollama:11434/api/tags")
|
||||
response.raise_for_status()
|
||||
|
||||
# Verify model loaded
|
||||
models = response.json().get("models", [])
|
||||
if "nomic-embed-text" not in [m["name"] for m in models]:
|
||||
return False, "Model not pulled"
|
||||
|
||||
return True, "OK"
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
```
|
||||
|
||||
## Migration Path
|
||||
|
||||
### Starting with Ollama
|
||||
|
||||
**Phase 1: Development** (Ollama)
|
||||
- Use Ollama for initial development
|
||||
- Validate embedding pipeline
|
||||
- Test search quality
|
||||
|
||||
**Phase 2: Growth** (Ollama → Infinity)
|
||||
- Monitor performance metrics
|
||||
- When >50 users or >10k docs, migrate to Infinity
|
||||
- Simple config change, no code changes
|
||||
|
||||
**Phase 3: Scale** (Infinity → TEI/Commercial)
|
||||
- When >200 users or performance issues
|
||||
- Consider TEI cluster or managed services
|
||||
|
||||
### Code Compatibility
|
||||
|
||||
All embedding providers use the same interface:
|
||||
```python
|
||||
# Works with Ollama, Infinity, OpenAI, Local
|
||||
embedding = await embedding_service.embed(text)
|
||||
embeddings = await embedding_service.embed_batch(texts)
|
||||
```
|
||||
|
||||
**Migration is a configuration change only** - no code rewrite needed.
|
||||
|
||||
## Conclusion
|
||||
|
||||
**Ollama is a solid choice for:**
|
||||
- Early-stage projects
|
||||
- Development/testing
|
||||
- Privacy-critical applications
|
||||
- Small deployments (<10 users, <10k docs)
|
||||
- Unified LLM + embedding stack
|
||||
|
||||
**But recognize its limitations:**
|
||||
- 5-9x slower than specialized engines
|
||||
- Not designed for high-throughput production
|
||||
- Batch processing can be problematic
|
||||
- Limited scalability
|
||||
|
||||
**Recommendation**:
|
||||
✅ **Include Ollama as Tier 2b** (after Infinity, before local models) in the embedding strategy. It provides a good balance of ease-of-use and privacy for small-to-medium deployments while allowing seamless migration to more performant engines as needs grow.
|
||||
|
||||
The key is designing the abstraction layer (as done in ADR-003) so migration between engines requires only configuration changes, not code rewrites.
|
||||
+8
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "nextcloud-mcp-server"
|
||||
version = "0.21.0"
|
||||
version = "0.22.7"
|
||||
description = "Model Context Protocol (MCP) server for Nextcloud integration - enables AI assistants to interact with Nextcloud data"
|
||||
authors = [
|
||||
{name = "Chris Coutinho", email = "chris@coutinho.io"}
|
||||
@@ -65,6 +65,13 @@ version_scheme = "pep440"
|
||||
version_provider = "uv"
|
||||
update_changelog_on_bump = true
|
||||
major_version_zero = true
|
||||
version_files = [
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:appVersion",
|
||||
"charts/nextcloud-mcp-server/Chart.yaml:version"
|
||||
]
|
||||
ignored_tag_formats = [
|
||||
"nextcloud-mcp-server-*"
|
||||
]
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-select = ["I"]
|
||||
|
||||
@@ -3,8 +3,8 @@ Tests for Dynamic Client Registration (DCR) token_type parameter.
|
||||
|
||||
These tests verify that the Nextcloud OIDC server properly honors the token_type
|
||||
parameter during client registration, issuing the correct type of access tokens:
|
||||
- token_type="JWT" → JWT-formatted tokens (RFC 9068)
|
||||
- token_type="Bearer" → Opaque tokens (standard OAuth2)
|
||||
- token_type="jwt" → JWT-formatted tokens (RFC 9068)
|
||||
- token_type="opaque" → Opaque tokens (standard OAuth2)
|
||||
|
||||
This is critical for ensuring:
|
||||
1. Client choice is respected by the OIDC server
|
||||
@@ -208,12 +208,14 @@ async def test_dcr_respects_jwt_token_type(
|
||||
oauth_callback_server,
|
||||
):
|
||||
"""
|
||||
Test that DCR honors token_type=JWT and issues JWT-formatted tokens.
|
||||
Test that DCR honors token_type=jwt and issues JWT-formatted tokens.
|
||||
|
||||
This verifies:
|
||||
1. Client registration with token_type="JWT" succeeds
|
||||
1. Client registration with token_type="jwt" succeeds
|
||||
2. Tokens obtained via this client are JWT format (base64.base64.signature)
|
||||
3. JWT payload contains expected claims (sub, iss, scope, etc.)
|
||||
|
||||
Note: The OIDC app uses lowercase 'jwt' (not 'JWT').
|
||||
"""
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST")
|
||||
if not nextcloud_host:
|
||||
@@ -232,15 +234,15 @@ async def test_dcr_respects_jwt_token_type(
|
||||
token_endpoint = oidc_config.get("token_endpoint")
|
||||
authorization_endpoint = oidc_config.get("authorization_endpoint")
|
||||
|
||||
# Register client with token_type="JWT"
|
||||
logger.info("Registering OAuth client with token_type=JWT...")
|
||||
# Register client with token_type="jwt"
|
||||
logger.info("Registering OAuth client with token_type=jwt...")
|
||||
client_info = await register_client(
|
||||
nextcloud_url=nextcloud_host,
|
||||
registration_endpoint=registration_endpoint,
|
||||
client_name="DCR Test - JWT Token Type",
|
||||
redirect_uris=[callback_url],
|
||||
scopes="openid profile email notes:read notes:write",
|
||||
token_type="JWT",
|
||||
token_type="jwt",
|
||||
)
|
||||
|
||||
logger.info(f"Registered JWT client: {client_info.client_id[:16]}...")
|
||||
@@ -278,7 +280,7 @@ async def test_dcr_respects_jwt_token_type(
|
||||
assert "notes:write" in scopes, "JWT scope claim missing notes:write"
|
||||
|
||||
logger.info(
|
||||
f"✅ DCR with token_type=JWT works correctly! "
|
||||
f"✅ DCR with token_type=jwt works correctly! "
|
||||
f"Token is JWT format with scope claim: {payload['scope']}"
|
||||
)
|
||||
|
||||
@@ -290,12 +292,14 @@ async def test_dcr_respects_bearer_token_type(
|
||||
oauth_callback_server,
|
||||
):
|
||||
"""
|
||||
Test that DCR honors token_type=Bearer and issues opaque tokens.
|
||||
Test that DCR honors token_type=opaque and issues opaque tokens.
|
||||
|
||||
This verifies:
|
||||
1. Client registration with token_type="Bearer" succeeds
|
||||
1. Client registration with token_type="opaque" succeeds
|
||||
2. Tokens obtained via this client are opaque (NOT JWT format)
|
||||
3. Opaque tokens are simple strings, not base64-encoded structures
|
||||
|
||||
Note: The OIDC app uses 'opaque' or 'jwt' as token_type values (not 'Bearer').
|
||||
"""
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST")
|
||||
if not nextcloud_host:
|
||||
@@ -314,18 +318,18 @@ async def test_dcr_respects_bearer_token_type(
|
||||
token_endpoint = oidc_config.get("token_endpoint")
|
||||
authorization_endpoint = oidc_config.get("authorization_endpoint")
|
||||
|
||||
# Register client with token_type="Bearer" (opaque tokens)
|
||||
logger.info("Registering OAuth client with token_type=Bearer...")
|
||||
# Register client with token_type="opaque" (opaque tokens)
|
||||
logger.info("Registering OAuth client with token_type=opaque...")
|
||||
client_info = await register_client(
|
||||
nextcloud_url=nextcloud_host,
|
||||
registration_endpoint=registration_endpoint,
|
||||
client_name="DCR Test - Bearer Token Type",
|
||||
client_name="DCR Test - Opaque Token Type",
|
||||
redirect_uris=[callback_url],
|
||||
scopes="openid profile email notes:read notes:write",
|
||||
token_type="Bearer",
|
||||
token_type="opaque",
|
||||
)
|
||||
|
||||
logger.info(f"Registered Bearer client: {client_info.client_id[:16]}...")
|
||||
logger.info(f"Registered Opaque token client: {client_info.client_id[:16]}...")
|
||||
|
||||
# Obtain token via OAuth flow
|
||||
access_token = await get_oauth_token_with_client(
|
||||
@@ -353,7 +357,7 @@ async def test_dcr_respects_bearer_token_type(
|
||||
pass
|
||||
|
||||
logger.info(
|
||||
f"✅ DCR with token_type=Bearer works correctly! "
|
||||
f"✅ DCR with token_type=opaque works correctly! "
|
||||
f"Token is opaque (not JWT format): {access_token[:30]}..."
|
||||
)
|
||||
|
||||
|
||||
Vendored
+1
-1
Submodule third_party/oidc updated: e4659c79ef...84f31d302f
Reference in New Issue
Block a user