Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 050b4d2eeb |
@@ -1,89 +0,0 @@
|
||||
name: Build and Publish Astrolabe App Release
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- 'astrolabe-v*'
|
||||
|
||||
env:
|
||||
APP_NAME: astrolabe
|
||||
APP_DIR: third_party/astrolabe
|
||||
|
||||
jobs:
|
||||
build-and-publish:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
|
||||
- name: Get version from tag
|
||||
id: tag
|
||||
run: |
|
||||
echo "TAG=${GITHUB_REF#refs/tags/astrolabe-v}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Validate version in info.xml matches tag
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: |
|
||||
INFO_VERSION=$(sed -n 's/.*<version>\(.*\)<\/version>.*/\1/p' appinfo/info.xml | tr -d '\t')
|
||||
if [ "$INFO_VERSION" != "${{ steps.tag.outputs.TAG }}" ]; then
|
||||
echo "Version mismatch: info.xml has $INFO_VERSION but tag is ${{ steps.tag.outputs.TAG }}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Version validated: $INFO_VERSION"
|
||||
|
||||
- name: Setup Node
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4
|
||||
with:
|
||||
node-version: 22
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@44454db4f0199b8b9685a5d763dc37cbf79108e1 # v2
|
||||
with:
|
||||
php-version: 8.1
|
||||
coverage: none
|
||||
|
||||
- name: Checkout Nextcloud server (for signing)
|
||||
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
|
||||
with:
|
||||
repository: nextcloud/server
|
||||
ref: stable30
|
||||
path: server
|
||||
|
||||
- name: Install dependencies and build
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: |
|
||||
composer install --no-dev --optimize-autoloader
|
||||
npm ci
|
||||
npm run build
|
||||
|
||||
- name: Setup signing certificate
|
||||
run: |
|
||||
mkdir -p $HOME/.nextcloud/certificates
|
||||
echo "${{ secrets.APP_PRIVATE_KEY }}" > $HOME/.nextcloud/certificates/${{ env.APP_NAME }}.key
|
||||
echo "${{ secrets.APP_PUBLIC_CRT }}" > $HOME/.nextcloud/certificates/${{ env.APP_NAME }}.crt
|
||||
|
||||
- name: Build app store package
|
||||
working-directory: ${{ env.APP_DIR }}
|
||||
run: make appstore server_dir=${{ github.workspace }}/server
|
||||
|
||||
- name: Create GitHub release and attach tarball
|
||||
uses: svenstaro/upload-release-action@6b7fa9f267e90b50a19fef07b3596790bb941741 # v2
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ${{ env.APP_DIR }}/build/artifacts/${{ env.APP_NAME }}.tar.gz
|
||||
asset_name: ${{ env.APP_NAME }}-${{ steps.tag.outputs.TAG }}.tar.gz
|
||||
tag: ${{ github.ref }}
|
||||
release_name: Astrolabe ${{ steps.tag.outputs.TAG }}
|
||||
prerelease: ${{ contains(steps.tag.outputs.TAG, '-alpha') || contains(steps.tag.outputs.TAG, '-beta') || contains(steps.tag.outputs.TAG, '-rc') }}
|
||||
|
||||
- name: Upload to Nextcloud App Store
|
||||
uses: R0Wi/nextcloud-appstore-push-action@9244bb5445776688cfe90fa1903ea8dff95b0c28 # v1.0.4
|
||||
with:
|
||||
app_name: ${{ env.APP_NAME }}
|
||||
appstore_token: ${{ secrets.APPSTORE_TOKEN }}
|
||||
download_url: ${{ github.server_url }}/${{ github.repository }}/releases/download/${{ github.ref_name }}/${{ env.APP_NAME }}-${{ steps.tag.outputs.TAG }}.tar.gz
|
||||
app_private_key: ${{ secrets.APP_PRIVATE_KEY }}
|
||||
nightly: ${{ contains(steps.tag.outputs.TAG, '-alpha') || contains(steps.tag.outputs.TAG, '-beta') || contains(steps.tag.outputs.TAG, '-rc') }}
|
||||
@@ -1,275 +0,0 @@
|
||||
# Consolidated CI workflow for Astroglobe Nextcloud app
|
||||
#
|
||||
# Runs on PRs that modify the astroglobe directory
|
||||
# Based on Nextcloud app skeleton workflows
|
||||
#
|
||||
# SPDX-FileCopyrightText: 2025 Nextcloud MCP Server contributors
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
name: Astroglobe CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
paths:
|
||||
- 'third_party/astroglobe/**'
|
||||
- '.github/workflows/astroglobe-ci.yml'
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: astroglobe-ci-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
changes:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
outputs:
|
||||
frontend: ${{ steps.changes.outputs.frontend }}
|
||||
php: ${{ steps.changes.outputs.php }}
|
||||
steps:
|
||||
- uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
|
||||
id: changes
|
||||
continue-on-error: true
|
||||
with:
|
||||
filters: |
|
||||
frontend:
|
||||
- 'third_party/astroglobe/src/**'
|
||||
- 'third_party/astroglobe/package.json'
|
||||
- 'third_party/astroglobe/package-lock.json'
|
||||
- 'third_party/astroglobe/vite.config.js'
|
||||
- 'third_party/astroglobe/**/*.js'
|
||||
- 'third_party/astroglobe/**/*.ts'
|
||||
- 'third_party/astroglobe/**/*.vue'
|
||||
php:
|
||||
- 'third_party/astroglobe/lib/**'
|
||||
- 'third_party/astroglobe/appinfo/**'
|
||||
- 'third_party/astroglobe/composer.json'
|
||||
- 'third_party/astroglobe/psalm.xml'
|
||||
|
||||
# Node.js build and lint
|
||||
node-build:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: Node.js build
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies & build
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: |
|
||||
npm ci
|
||||
npm run build --if-present
|
||||
|
||||
- name: Check webpack build changes
|
||||
run: |
|
||||
bash -c "[[ ! \"`git status --porcelain `\" ]] || (echo 'Please recompile and commit the assets' && exit 1)"
|
||||
|
||||
# ESLint
|
||||
eslint:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: ESLint
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: npm ci
|
||||
|
||||
- name: Lint
|
||||
run: npm run lint
|
||||
|
||||
# Stylelint
|
||||
stylelint:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.frontend != 'false'
|
||||
name: Stylelint
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Read package.json node and npm engines version
|
||||
uses: skjnldsv/read-package-engines-version-actions@06d6baf7d8f41934ab630e97d9e6c0bc9c9ac5e4 # v3
|
||||
id: versions
|
||||
with:
|
||||
path: third_party/astroglobe
|
||||
fallbackNode: '^20'
|
||||
fallbackNpm: '^10'
|
||||
|
||||
- name: Set up node ${{ steps.versions.outputs.nodeVersion }}
|
||||
uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
|
||||
with:
|
||||
node-version: ${{ steps.versions.outputs.nodeVersion }}
|
||||
|
||||
- name: Set up npm ${{ steps.versions.outputs.npmVersion }}
|
||||
run: npm i -g 'npm@${{ steps.versions.outputs.npmVersion }}'
|
||||
|
||||
- name: Install dependencies
|
||||
env:
|
||||
CYPRESS_INSTALL_BINARY: 0
|
||||
PUPPETEER_SKIP_DOWNLOAD: true
|
||||
run: npm ci
|
||||
|
||||
- name: Lint
|
||||
run: npm run stylelint
|
||||
|
||||
# PHP Code Style
|
||||
php-cs:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.php != 'false'
|
||||
name: PHP CS Fixer
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Get php version
|
||||
id: versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Set up php${{ steps.versions.outputs.php-min }}
|
||||
uses: shivammathur/setup-php@cf4cade2721270509d5b1c766ab3549210a39a2a # v2.33.0
|
||||
with:
|
||||
php-version: ${{ steps.versions.outputs.php-min }}
|
||||
extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, session, simplexml, xmlreader, xmlwriter, zip, zlib, sqlite, pdo_sqlite
|
||||
coverage: none
|
||||
ini-file: development
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove nextcloud/ocp --dev || true
|
||||
composer i
|
||||
|
||||
- name: Lint
|
||||
run: composer run cs:check || ( echo 'Please run `composer run cs:fix` to format your code' && exit 1 )
|
||||
|
||||
# Psalm Static Analysis
|
||||
psalm:
|
||||
runs-on: ubuntu-latest
|
||||
needs: changes
|
||||
if: needs.changes.outputs.php != 'false'
|
||||
name: Psalm
|
||||
defaults:
|
||||
run:
|
||||
working-directory: third_party/astroglobe
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
|
||||
- name: Get php version
|
||||
id: versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Set up php${{ steps.versions.outputs.php-min }}
|
||||
uses: shivammathur/setup-php@cf4cade2721270509d5b1c766ab3549210a39a2a # v2.33.0
|
||||
with:
|
||||
php-version: ${{ steps.versions.outputs.php-min }}
|
||||
extensions: bz2, ctype, curl, dom, fileinfo, gd, iconv, intl, json, libxml, mbstring, openssl, pcntl, posix, session, simplexml, xmlreader, xmlwriter, zip, zlib, sqlite, pdo_sqlite
|
||||
coverage: none
|
||||
ini-file: development
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
composer remove nextcloud/ocp --dev || true
|
||||
composer i
|
||||
|
||||
- name: Get OCP version matrix
|
||||
id: ocp-versions
|
||||
uses: icewind1991/nextcloud-version-matrix@58becf3b4bb6dc6cef677b15e2fd8e7d48c0908f # v1.3.1
|
||||
with:
|
||||
filename: third_party/astroglobe/appinfo/info.xml
|
||||
|
||||
- name: Install OCP for static analysis
|
||||
run: |
|
||||
# Get first OCP version from matrix
|
||||
OCP_VERSION=$(echo '${{ steps.ocp-versions.outputs.ocp-matrix }}' | jq -r '.include[0]."ocp-version"')
|
||||
composer require --dev "nextcloud/ocp:$OCP_VERSION" --ignore-platform-reqs --with-dependencies
|
||||
|
||||
- name: Run Psalm
|
||||
run: composer run psalm -- --threads=1 --monochrome --no-progress --output-format=github
|
||||
|
||||
# Summary job
|
||||
summary:
|
||||
permissions:
|
||||
contents: none
|
||||
runs-on: ubuntu-latest
|
||||
needs: [changes, node-build, eslint, stylelint, php-cs, psalm]
|
||||
if: always()
|
||||
name: astroglobe-ci-summary
|
||||
steps:
|
||||
- name: Summary status
|
||||
run: |
|
||||
if ${{ needs.changes.outputs.frontend != 'false' && (needs.node-build.result != 'success' || needs.eslint.result != 'success' || needs.stylelint.result != 'success') }}; then
|
||||
echo "Frontend checks failed"
|
||||
exit 1
|
||||
fi
|
||||
if ${{ needs.changes.outputs.php != 'false' && (needs.php-cs.result != 'success' || needs.psalm.result != 'success') }}; then
|
||||
echo "PHP checks failed"
|
||||
exit 1
|
||||
fi
|
||||
echo "All checks passed"
|
||||
@@ -7,159 +7,26 @@ on:
|
||||
|
||||
jobs:
|
||||
bump-version:
|
||||
if: "!startsWith(github.event.head_commit.message, 'bump:') && !startsWith(github.event.head_commit.message, 'chore(release):')"
|
||||
if: "!startsWith(github.event.head_commit.message, 'bump:')"
|
||||
runs-on: ubuntu-latest
|
||||
name: "Bump version and create changelog for monorepo components"
|
||||
name: "Bump version and create changelog with commitizen"
|
||||
permissions:
|
||||
contents: write
|
||||
packages: write
|
||||
steps:
|
||||
- name: Check out
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: "${{ secrets.PERSONAL_ACCESS_TOKEN }}"
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
|
||||
- name: Create bump and changelog
|
||||
uses: commitizen-tools/commitizen-action@bb4f1df6601e2a1a891506581b0c53acdc88e07d # 0.26.0
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Install uv
|
||||
run: |
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: Configure git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Detect and bump component versions
|
||||
id: bump
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
# Track which components were bumped
|
||||
BUMPED_COMPONENTS=""
|
||||
|
||||
# Helper function to check for commits with specific scope since last tag
|
||||
has_commits_since_tag() {
|
||||
local tag_pattern="$1"
|
||||
local scope_pattern="$2"
|
||||
|
||||
# Get the most recent tag matching the pattern
|
||||
local last_tag=$(git tag --sort=-creatordate | grep -E "^${tag_pattern}" | head -n 1 || echo "")
|
||||
|
||||
if [ -z "$last_tag" ]; then
|
||||
# No previous tag, check all commits on master
|
||||
local commit_range="master"
|
||||
else
|
||||
# Check commits since last tag
|
||||
local commit_range="${last_tag}..HEAD"
|
||||
fi
|
||||
|
||||
# Count commits matching the scope pattern
|
||||
local commit_count=$(git log "$commit_range" --oneline --grep="^${scope_pattern}" -E | wc -l)
|
||||
|
||||
if [ "$commit_count" -gt 0 ]; then
|
||||
echo "Found $commit_count commits for scope '$scope_pattern' since $last_tag"
|
||||
return 0
|
||||
else
|
||||
echo "No commits found for scope '$scope_pattern' since $last_tag"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Bump MCP server (default - all commits except helm/astrolabe scopes)
|
||||
echo "Checking MCP server for version bump..."
|
||||
|
||||
# Get the most recent MCP tag
|
||||
last_mcp_tag=$(git tag --sort=-creatordate | grep -E "^v[0-9]" | head -n 1 || echo "")
|
||||
|
||||
if [ -z "$last_mcp_tag" ]; then
|
||||
commit_range="master"
|
||||
else
|
||||
commit_range="${last_mcp_tag}..HEAD"
|
||||
fi
|
||||
|
||||
# Count conventional commits that are NOT scoped to helm or astrolabe
|
||||
mcp_commit_count=$(git log "$commit_range" --oneline --grep="^(feat|fix|docs|refactor|perf|test|build|ci|chore)" -E | \
|
||||
{ grep -v "(helm)" || true; } | { grep -v "(astrolabe)" || true; } | wc -l)
|
||||
|
||||
if [ "$mcp_commit_count" -gt 0 ]; then
|
||||
echo "Found $mcp_commit_count commits for MCP server since $last_mcp_tag"
|
||||
echo "Bumping MCP server version..."
|
||||
./scripts/bump-mcp.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS mcp"
|
||||
else
|
||||
echo "No commits found for MCP server since $last_mcp_tag"
|
||||
fi
|
||||
|
||||
# Bump Helm chart (scope: helm)
|
||||
echo "Checking Helm chart for version bump..."
|
||||
if has_commits_since_tag "nextcloud-mcp-server-" "(feat|fix|docs|refactor|perf|test|build|ci|chore)\(helm\)(!)?:"; then
|
||||
echo "Bumping Helm chart version..."
|
||||
./scripts/bump-helm.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS helm"
|
||||
fi
|
||||
|
||||
# Bump Astrolabe (scope: astrolabe)
|
||||
echo "Checking Astrolabe for version bump..."
|
||||
if has_commits_since_tag "astrolabe-v" "(feat|fix|docs|refactor|perf|test|build|ci|chore)\(astrolabe\)(!)?:"; then
|
||||
echo "Bumping Astrolabe version..."
|
||||
./scripts/bump-astrolabe.sh
|
||||
BUMPED_COMPONENTS="$BUMPED_COMPONENTS astrolabe"
|
||||
fi
|
||||
|
||||
# Output summary
|
||||
if [ -z "$BUMPED_COMPONENTS" ]; then
|
||||
echo "No components required version bumps"
|
||||
echo "bumped=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "Bumped components:$BUMPED_COMPONENTS"
|
||||
echo "bumped=true" >> $GITHUB_OUTPUT
|
||||
echo "components=$BUMPED_COMPONENTS" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Push tags
|
||||
if: steps.bump.outputs.bumped == 'true'
|
||||
run: |
|
||||
git push
|
||||
git push --tags
|
||||
echo "Pushed tags for components:${{ steps.bump.outputs.components }}"
|
||||
|
||||
- name: Summary
|
||||
run: |
|
||||
if [ "${{ steps.bump.outputs.bumped }}" == "true" ]; then
|
||||
echo "## Version Bump Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "The following components were bumped:" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
for component in ${{ steps.bump.outputs.components }}; do
|
||||
case $component in
|
||||
mcp)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^v[0-9]' | head -n 1)
|
||||
echo "- **MCP Server**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
helm)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^nextcloud-mcp-server-' | head -n 1)
|
||||
echo "- **Helm Chart**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
astrolabe)
|
||||
tag=$(git tag --sort=-creatordate | grep -E '^astrolabe-v' | head -n 1)
|
||||
echo "- **Astrolabe**: \`$tag\`" >> $GITHUB_STEP_SUMMARY
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Tags have been pushed and release workflows will trigger automatically." >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "## Version Bump Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ No version bumps required - no relevant commits found since last release." >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "The workflow completed successfully with no changes." >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
github_token: ${{ secrets.PERSONAL_ACCESS_TOKEN }}
|
||||
changelog_increment_filename: body.md
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@5be0e66d93ac7ed76da52eca8bb058f665c3a5fe # v2.4.2
|
||||
with:
|
||||
body_path: "body.md"
|
||||
tag_name: v${{ env.REVISION }}
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
name: Claude Code Review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, synchronize]
|
||||
# Optional: Only run on specific file changes
|
||||
# paths:
|
||||
# - "src/**/*.ts"
|
||||
# - "src/**/*.tsx"
|
||||
# - "src/**/*.js"
|
||||
# - "src/**/*.jsx"
|
||||
|
||||
jobs:
|
||||
claude-review:
|
||||
# Optional: Filter by PR author
|
||||
# if: |
|
||||
# github.event.pull_request.user.login == 'external-contributor' ||
|
||||
# github.event.pull_request.user.login == 'new-developer' ||
|
||||
# github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code Review
|
||||
id: claude-review
|
||||
uses: anthropics/claude-code-action@7145c3e0510bcdbdd29f67cc4a8c1958f1acfa2f # v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
prompt: |
|
||||
REPO: ${{ github.repository }}
|
||||
PR NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
Please review this pull request and provide feedback on:
|
||||
- Code quality and best practices
|
||||
- Potential bugs or issues
|
||||
- Performance considerations
|
||||
- Security concerns
|
||||
- Test coverage
|
||||
|
||||
Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
|
||||
|
||||
Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
|
||||
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
|
||||
claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
name: Claude Code
|
||||
|
||||
on:
|
||||
issue_comment:
|
||||
types: [created]
|
||||
pull_request_review_comment:
|
||||
types: [created]
|
||||
issues:
|
||||
types: [opened, assigned]
|
||||
pull_request_review:
|
||||
types: [submitted]
|
||||
|
||||
jobs:
|
||||
claude:
|
||||
if: |
|
||||
(github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
|
||||
(github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
|
||||
(github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: read
|
||||
issues: read
|
||||
id-token: write
|
||||
actions: read # Required for Claude to read CI results on PRs
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Run Claude Code
|
||||
id: claude
|
||||
uses: anthropics/claude-code-action@7145c3e0510bcdbdd29f67cc4a8c1958f1acfa2f # v1
|
||||
with:
|
||||
claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
|
||||
|
||||
# This is an optional setting that allows Claude to read CI results on PRs
|
||||
additional_permissions: |
|
||||
actions: read
|
||||
|
||||
# Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
|
||||
# prompt: 'Update the pull request description to include a summary of changes.'
|
||||
|
||||
# Optional: Add claude_args to customize behavior and configuration
|
||||
# See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
|
||||
# or https://docs.claude.com/en/docs/claude-code/cli-reference for available options
|
||||
# claude_args: '--allowed-tools Bash(gh pr:*)'
|
||||
|
||||
@@ -2,8 +2,7 @@ name: Build and Publish Docker Image
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
tags: ["*"]
|
||||
|
||||
jobs:
|
||||
build-and-push:
|
||||
@@ -13,11 +12,11 @@ jobs:
|
||||
packages: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # v5
|
||||
uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # v5
|
||||
with:
|
||||
# list of Docker images to use as base name for tags
|
||||
images: |
|
||||
@@ -34,7 +33,7 @@ jobs:
|
||||
type=raw,value=latest,enable={{is_default_branch}}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3
|
||||
|
||||
- name: Log in to GitHub Container Registry
|
||||
if: github.event_name != 'pull_request'
|
||||
|
||||
@@ -4,7 +4,6 @@ on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
- nextcloud-mcp-server-*
|
||||
|
||||
jobs:
|
||||
release:
|
||||
@@ -15,7 +14,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,8 +38,6 @@ jobs:
|
||||
|
||||
- name: Run chart-releaser
|
||||
uses: helm/chart-releaser-action@cae68fefc6b5f367a0275617c9f83181ba54714f # v1.7.0
|
||||
with:
|
||||
skip_existing: true
|
||||
env:
|
||||
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
name: RAG Evaluation
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
manual_path:
|
||||
description: 'Path to Nextcloud User Manual PDF in Nextcloud'
|
||||
required: false
|
||||
default: 'Nextcloud Manual.pdf'
|
||||
embedding_model:
|
||||
description: 'OpenAI embedding model'
|
||||
required: false
|
||||
default: 'openai/text-embedding-3-small'
|
||||
generation_model:
|
||||
description: 'OpenAI generation model'
|
||||
required: false
|
||||
default: 'openai/gpt-4o-mini'
|
||||
|
||||
jobs:
|
||||
rag-evaluation:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
permissions:
|
||||
models: read
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
|
||||
- name: Run docker compose with vector sync
|
||||
uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
|
||||
with:
|
||||
compose-file: |
|
||||
./docker-compose.yml
|
||||
./docker-compose.ci.yml
|
||||
up-flags: "--build"
|
||||
env:
|
||||
# Environment variables passed to docker-compose.ci.yml
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: "https://models.github.ai/inference"
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
|
||||
VECTOR_SYNC_SCAN_INTERVAL: "5"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
|
||||
- name: Wait for Nextcloud to be ready
|
||||
run: |
|
||||
echo "Waiting for Nextcloud..."
|
||||
max_attempts=60
|
||||
attempt=0
|
||||
until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/ocs/v2.php/apps/serverinfo/api/v1/info | grep -q "401"; do
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "Service did not become ready in time."
|
||||
exit 1
|
||||
fi
|
||||
echo "Attempt $attempt/$max_attempts: Service not ready, sleeping for 5 seconds..."
|
||||
sleep 5
|
||||
done
|
||||
echo "Nextcloud is ready."
|
||||
|
||||
- name: Wait for MCP server to be ready
|
||||
run: |
|
||||
echo "Waiting for MCP server..."
|
||||
max_attempts=30
|
||||
attempt=0
|
||||
until curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8000/health/live | grep -q "200"; do
|
||||
attempt=$((attempt + 1))
|
||||
if [ $attempt -ge $max_attempts ]; then
|
||||
echo "MCP server did not become ready in time."
|
||||
exit 1
|
||||
fi
|
||||
echo "Attempt $attempt/$max_attempts: MCP not ready, sleeping for 2 seconds..."
|
||||
sleep 2
|
||||
done
|
||||
echo "MCP server is ready."
|
||||
|
||||
- name: Run RAG evaluation tests
|
||||
env:
|
||||
NEXTCLOUD_HOST: "http://localhost:8080"
|
||||
NEXTCLOUD_USERNAME: "admin"
|
||||
NEXTCLOUD_PASSWORD: "admin"
|
||||
RAG_MANUAL_PATH: ${{ inputs.manual_path }}
|
||||
OPENAI_API_KEY: ${{ secrets.GITHUB_TOKEN }}
|
||||
OPENAI_BASE_URL: "https://models.github.ai/inference"
|
||||
OPENAI_EMBEDDING_MODEL: ${{ inputs.embedding_model }}
|
||||
OPENAI_GENERATION_MODEL: ${{ inputs.generation_model }}
|
||||
run: |
|
||||
uv run pytest tests/integration/test_rag.py -v --log-cli-level=INFO --provider openai
|
||||
|
||||
- name: Capture MCP container logs
|
||||
if: always()
|
||||
run: |
|
||||
echo "=== MCP Container Logs ==="
|
||||
docker compose logs mcp --tail=500
|
||||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5
|
||||
with:
|
||||
name: rag-evaluation-results
|
||||
path: |
|
||||
pytest-results.xml
|
||||
retention-days: 30
|
||||
@@ -18,9 +18,9 @@ jobs:
|
||||
contents: read
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6
|
||||
uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
- name: Install Python 3.11
|
||||
run: uv python install 3.11
|
||||
- name: Build
|
||||
|
||||
@@ -9,9 +9,9 @@ jobs:
|
||||
linting:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
- name: Check format
|
||||
run: |
|
||||
uv run --frozen ruff format --diff
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
- uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1
|
||||
with:
|
||||
submodules: 'true'
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
###### Required to build OIDC App ######
|
||||
|
||||
- name: Set up php 8.4
|
||||
uses: shivammathur/setup-php@44454db4f0199b8b9685a5d763dc37cbf79108e1 # v2
|
||||
uses: shivammathur/setup-php@bf6b4fbd49ca58e4608c9c89fba0b8d90bd2a39f # v2
|
||||
with:
|
||||
php-version: 8.4
|
||||
coverage: none
|
||||
@@ -48,32 +48,15 @@ jobs:
|
||||
###### Required to build OIDC App ######
|
||||
|
||||
|
||||
###### Required to build Astrolabe App ######
|
||||
|
||||
- name: Set up Node.js for Astrolabe
|
||||
uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
|
||||
with:
|
||||
node-version: '20'
|
||||
|
||||
- name: Build Astrolabe app
|
||||
run: |
|
||||
cd third_party/astrolabe
|
||||
composer install --no-dev --optimize-autoloader
|
||||
npm ci
|
||||
npm run build
|
||||
|
||||
###### Required to build Astrolabe App ######
|
||||
|
||||
|
||||
- name: Run docker compose
|
||||
uses: hoverkraft-tech/compose-action@248470ecc5ed40d8ed3d4480d8260d77179ef579 # v2.4.2
|
||||
uses: hoverkraft-tech/compose-action@3846bcd61da338e9eaaf83e7ed0234a12b099b72 # v2.4.1
|
||||
with:
|
||||
compose-file: "./docker-compose.yml"
|
||||
#compose-flags: "--profile qdrant"
|
||||
up-flags: "--build"
|
||||
|
||||
- name: Install the latest version of uv
|
||||
uses: astral-sh/setup-uv@681c641aba71e4a1c380be3ab5e12ad51f415867 # v7.1.6
|
||||
uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # v7.1.3
|
||||
|
||||
- name: Install Playwright dependencies
|
||||
run: |
|
||||
|
||||
-362
@@ -1,365 +1,3 @@
|
||||
# Changelog - MCP Server
|
||||
|
||||
All notable changes to the Nextcloud MCP Server will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [PEP 440](https://peps.python.org/pep-0440/).
|
||||
|
||||
## v0.60.0 (2025-12-26)
|
||||
|
||||
### Feat
|
||||
|
||||
- Remove URL rewriting in favor of proper nextcloud config
|
||||
- **helm**: migrate to new environment variable naming convention
|
||||
- Migrate to vue 3
|
||||
- **astrolabe**: upgrade to Vue 3 and @nextcloud/vue 9
|
||||
|
||||
### Fix
|
||||
|
||||
- **tests**: Add singleton reset fixture to prevent anyio.WouldBlock errors
|
||||
- **tests**: Fix integration test failures in qdrant, sampling, and rag tests
|
||||
- **auth**: Skip issuer validation for management API tokens
|
||||
- Use settings.enable_offline_access for env var consolidation
|
||||
- Add required config.py attributes
|
||||
- **docker**: remove overwritehost to fix container-to-container DCR
|
||||
- **deps**: update dependency @nextcloud/vue to v9
|
||||
- **deps**: update dependency vue to v3
|
||||
|
||||
### Refactor
|
||||
|
||||
- **auth**: Decouple BasicAuth and OAuth authentication strategies
|
||||
|
||||
## v0.59.1 (2025-12-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: set OIDC client env vars when using existingSecret
|
||||
- **helm**: trigger chart release workflow on helm chart tags
|
||||
|
||||
## v0.59.0 (2025-12-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: add support for multi-user BasicAuth mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: address PR #447 reviewer feedback
|
||||
- **helm**: include MCP server version bumps in changelog pattern
|
||||
|
||||
## v0.58.0 (2025-12-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- **config**: enable DCR for multi-user BasicAuth with offline access
|
||||
- **astrolabe**: implement app password provisioning for multi-user background sync
|
||||
- **config**: consolidate configuration with smart dependency resolution (ADR-021)
|
||||
|
||||
## v0.57.0 (2025-12-20)
|
||||
|
||||
### Feat
|
||||
|
||||
- **auth**: add multi-user BasicAuth pass-through mode
|
||||
- **astrolabe**: add dynamic MCP server configuration for testing
|
||||
|
||||
### Fix
|
||||
|
||||
- **config**: address reviewer feedback
|
||||
|
||||
### Refactor
|
||||
|
||||
- **config**: centralize configuration validation and simplify startup
|
||||
|
||||
## v0.56.2 (2025-12-20)
|
||||
|
||||
### Fix
|
||||
|
||||
- **astrolabe**: screenshots in info.xml
|
||||
- **astrolabe**: screenshots in info.xml
|
||||
|
||||
## v0.56.1 (2025-12-19)
|
||||
|
||||
### Fix
|
||||
|
||||
- **astrolabe**: Update screenshots
|
||||
- **ci**: skip existing Helm chart releases to prevent duplicate release errors
|
||||
|
||||
## v0.56.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: add --increment flag to bump scripts for manual version control
|
||||
|
||||
### Fix
|
||||
|
||||
- **astrolabe**: add contents:write permission to appstore workflow
|
||||
- **astrolabe**: update commitizen pattern to properly update info.xml version
|
||||
- **astrolabe**: prevent workflow failure when only helm/astrolabe commits exist
|
||||
- **astrolabe**: info.xml
|
||||
|
||||
## v0.55.1 (2025-12-19)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: push all tags explicitly in bump workflow
|
||||
|
||||
## v0.55.0 (2025-12-19)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- MCP server now bumps for ANY conventional commit except
|
||||
those explicitly scoped to helm or astrolabe.
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: implement monorepo-aware version bumping workflow
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: make MCP server default bump target for all non-scoped commits
|
||||
- **ci**: restrict docker build to MCP server tags only
|
||||
- **ci**: correct appstore-push-action version to v1.0.4
|
||||
|
||||
## v0.54.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **astrolabe**: add Nextcloud App Store deployment automation
|
||||
- configure commitizen monorepo with independent versioning
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: improve versioning and error handling
|
||||
- **ci**: address critical workflow and validation issues
|
||||
- **astrolabe**: address code review feedback
|
||||
|
||||
## v0.53.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- add Alembic database migration system
|
||||
- make chunk modal title clickable link to documents
|
||||
- add native Plotly hover styling for clickable points
|
||||
- add click interactivity to Plotly 3D scatter chart
|
||||
- improve chunk viewer with fixed navigation and markdown rendering
|
||||
- **astrolabe**: enable multi-select for document types and refactor PDF viewer
|
||||
- **auth**: implement refresh token rotation for Nextcloud OIDC
|
||||
- **astrolabe**: enhance unified search and add webhook management
|
||||
- **astrolabe**: add webhook management UI to admin settings
|
||||
- **astrolabe**: add OAuth token refresh and webhook presets
|
||||
- **search**: add file_path metadata and chunk offsets to search results
|
||||
- **astrolabe**: use proper icons and thumbnails in unified search
|
||||
- **astrolabe**: add admin search settings and enhanced UI
|
||||
- **astrolabe**: add unified search provider with clickable file links
|
||||
- **astrolabe**: add 3D PCA visualization for semantic search
|
||||
- **astrolabe**: add Nextcloud PHP app for MCP server management
|
||||
- **vector-sync**: enable background sync in OAuth mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **security**: address critical security issues from PR #401 code review
|
||||
- **oauth**: enable PKCE for all clients and add token_broker to oauth_context
|
||||
- **astrolabe**: revert invalid files_pdfviewer URL for file links
|
||||
- resolve type checking warnings for CI
|
||||
- move Alembic to package submodule for Docker compatibility
|
||||
- update unified search results to match chunk viz display
|
||||
- **astrolabe**: handle OAuth refresh token rotation
|
||||
- address critical code review issues (4 fixes)
|
||||
- resolve CI linting issues for Astroglobe
|
||||
|
||||
### Refactor
|
||||
|
||||
- **astrolabe**: extract PDF viewer to dedicated component
|
||||
- **astrolabe**: reframe UI as semantic search service
|
||||
|
||||
## v0.52.1 (2025-12-13)
|
||||
|
||||
### Perf
|
||||
|
||||
- **deck**: optimize card lookup by storing board_id/stack_id in metadata
|
||||
|
||||
## v0.52.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector**: add Deck card vector search with visualization support
|
||||
|
||||
## v0.51.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector-viz**: add news_item support for links and chunk expansion
|
||||
|
||||
## v0.50.2 (2025-12-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- **news**: revert get_item() to use get_items() + filter
|
||||
|
||||
## v0.50.1 (2025-12-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- Disable DNS rebinding protection for containerized deployments
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## v0.50.0 (2025-12-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- add MCP tool annotations for enhanced UX
|
||||
|
||||
### Fix
|
||||
|
||||
- address PR review feedback
|
||||
|
||||
## v0.49.2 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update lockfile
|
||||
|
||||
## v0.49.1 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Revert mcp version <1.23
|
||||
|
||||
## v0.49.0 (2025-12-08)
|
||||
|
||||
### Feat
|
||||
|
||||
- **news**: add Nextcloud News app integration
|
||||
|
||||
### Fix
|
||||
|
||||
- resolve all type checking errors (8 errors fixed)
|
||||
|
||||
### Refactor
|
||||
|
||||
- **news**: simplify vector sync to fetch all items
|
||||
|
||||
### Perf
|
||||
|
||||
- **news**: use direct API endpoint for get_item()
|
||||
|
||||
## v0.48.6 (2025-12-03)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## v0.48.5 (2025-11-28)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency pillow to v12
|
||||
|
||||
## v0.48.4 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Add rate limit retry logic to OpenAI provider
|
||||
|
||||
## v0.48.3 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Increase MCP sampling timeout to 5 minutes for slower LLMs
|
||||
|
||||
## v0.48.2 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
|
||||
## v0.48.1 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- Move background tasks to server lifespan and deprecate SSE transport
|
||||
|
||||
## v0.48.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add tag management methods to WebDAV client
|
||||
|
||||
## v0.47.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add OpenAI provider support for embeddings and generation
|
||||
|
||||
## v0.46.2 (2025-11-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Enable JSON response format for scanner compatibility
|
||||
|
||||
## v0.46.1 (2025-11-22)
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize vector viz search performance
|
||||
|
||||
## v0.46.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Smithery CLI deployment support
|
||||
- Implement ADR-016 Smithery stateless deployment mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
|
||||
- **smithery**: Use container runtime pattern for config discovery
|
||||
- Add Smithery lifespan and auth mode detection
|
||||
|
||||
## v0.45.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add context expansion to semantic search with chunk overlap removal
|
||||
- Use Ollama native batch API in embed_batch()
|
||||
- Implement Qdrant placeholder state management
|
||||
- Switch files to use numeric IDs with file_path resolution
|
||||
- Implement per-chunk vector visualization with context expansion
|
||||
|
||||
### Fix
|
||||
|
||||
- Use alpha_composite for proper RGBA highlight blending
|
||||
- Remove pymupdf.layout.activate() to fix page_chunks behavior
|
||||
- Centralize PDF processing and generate separate images per chunk
|
||||
- Set is_placeholder=False in processor to fix search filtering
|
||||
- Increase placeholder staleness threshold to 5x scan interval
|
||||
- Add placeholder staleness check to prevent duplicate processing
|
||||
- Use empty SparseVector instead of None for placeholders
|
||||
- Return empty array instead of null for query_coords when no results
|
||||
- Align PDF text extraction between indexing and context expansion
|
||||
- Update models and viz to use int-only doc_id
|
||||
- Reconstruct full content for notes to match indexed offsets
|
||||
- Add async/await, PDF metadata, and type safety fixes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Simplify PDF text extraction with single to_markdown call
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize PDF processing with parallel extraction and single-render highlights
|
||||
|
||||
## v0.44.1 (2025-11-21)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.22,<1.23
|
||||
|
||||
## v0.44.0 (2025-11-19)
|
||||
|
||||
### Feat
|
||||
|
||||
@@ -56,68 +56,6 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
||||
- Pass-through (default): Simple, stateless (ENABLE_TOKEN_EXCHANGE=false)
|
||||
- Token exchange (opt-in): RFC 8693 delegation (ENABLE_TOKEN_EXCHANGE=true)
|
||||
|
||||
### MCP Tool Annotations (ADR-017)
|
||||
|
||||
**All tools MUST include annotations** following these patterns:
|
||||
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
# Read-only tools (list, search, get)
|
||||
@mcp.tool(
|
||||
title="Human Readable Name",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True, # Nextcloud is external to MCP server
|
||||
),
|
||||
)
|
||||
|
||||
# Create operations
|
||||
@mcp.tool(
|
||||
title="Create Resource",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Creates new resources each time
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Update operations (with etag/version control)
|
||||
@mcp.tool(
|
||||
title="Update Resource",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # ETag changes = different inputs
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# Delete operations
|
||||
@mcp.tool(
|
||||
title="Delete Resource",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Permanently deletes data
|
||||
idempotentHint=True, # Same end state if called repeatedly
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
|
||||
# HTTP PUT without version control (special case)
|
||||
@mcp.tool(
|
||||
title="Write File",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=True, # Same content = same end state
|
||||
openWorldHint=True,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
**Key Principles**:
|
||||
- **Idempotency**: Same inputs → same result. ETags change after updates, making them non-idempotent
|
||||
- **Destructive**: Operations that permanently delete/overwrite data
|
||||
- **Open World**: All Nextcloud tools access external service (openWorldHint=True)
|
||||
- **Titles**: Use human-readable names, not snake_case function names
|
||||
|
||||
**See**: `docs/ADR-017-mcp-tool-annotations.md` for detailed rationale and examples
|
||||
|
||||
### Project Structure
|
||||
- `nextcloud_mcp_server/client/` - HTTP clients for Nextcloud APIs
|
||||
- `nextcloud_mcp_server/server/` - MCP tool/resource definitions
|
||||
@@ -506,29 +444,6 @@ docker compose exec app php occ user_oidc:provider keycloak
|
||||
**Nextcloud**: `docker compose exec app php occ ...` for occ commands
|
||||
**MariaDB**: `docker compose exec db mariadb -u [user] -p [password] [database]` for queries
|
||||
|
||||
### Querying Nextcloud Application Logs
|
||||
|
||||
**Use this pattern** to inspect Nextcloud application logs during debugging:
|
||||
|
||||
```bash
|
||||
# View recent log entries
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq | tail
|
||||
|
||||
# Filter by app
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.app == "astrolabe")' | tail
|
||||
|
||||
# Filter by log level (0=DEBUG, 1=INFO, 2=WARN, 3=ERROR, 4=FATAL)
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.level >= 3)' | tail
|
||||
|
||||
# Search for specific messages
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.message | contains("OAuth"))' | tail -20
|
||||
|
||||
# View full exception traces
|
||||
docker compose exec app cat /var/www/html/data/nextcloud.log | jq 'select(.exception != null)' | tail -5
|
||||
```
|
||||
|
||||
**Log Structure**: Each entry is a JSON object with fields: `reqId`, `level`, `time`, `remoteAddr`, `user`, `app`, `method`, `url`, `message`, `userAgent`, `version`, `exception`
|
||||
|
||||
**For detailed setup, see**:
|
||||
- `docs/installation.md` - Installation guide
|
||||
- `docs/configuration.md` - Configuration options
|
||||
|
||||
-116
@@ -1,116 +0,0 @@
|
||||
# Contributing to Nextcloud MCP Server
|
||||
|
||||
## Version Management
|
||||
|
||||
This monorepo uses commitizen for version management with **independent versioning** for three components:
|
||||
|
||||
### Components
|
||||
|
||||
| Component | Scope | Bump Command | Tag Example |
|
||||
|-----------|-------|--------------|-------------|
|
||||
| MCP Server | `mcp` or none | `./scripts/bump-mcp.sh` | `v0.54.0` |
|
||||
| Helm Chart | `helm` | `./scripts/bump-helm.sh` | `nextcloud-mcp-server-0.54.0` |
|
||||
| Astrolabe App | `astrolabe` | `./scripts/bump-astrolabe.sh` | `astrolabe-v0.2.0` |
|
||||
|
||||
### Commit Message Format
|
||||
|
||||
Use conventional commits with **scopes** to target specific components:
|
||||
|
||||
```bash
|
||||
# MCP server changes
|
||||
feat(mcp): add calendar sync API
|
||||
fix(mcp): resolve authentication bug
|
||||
|
||||
# Helm chart changes
|
||||
feat(helm): add resource limits
|
||||
docs(helm): update values documentation
|
||||
|
||||
# Astrolabe app changes
|
||||
feat(astrolabe): add dark mode toggle
|
||||
fix(astrolabe): resolve search UI bug
|
||||
```
|
||||
|
||||
**Unscoped commits** default to the MCP server:
|
||||
```bash
|
||||
feat: add new feature # → MCP server (v0.54.0)
|
||||
```
|
||||
|
||||
### Release Workflow
|
||||
|
||||
#### 1. Make Changes with Scoped Commits
|
||||
|
||||
```bash
|
||||
git commit -m "feat(astrolabe): add dark mode toggle"
|
||||
git commit -m "feat(helm): add ingress annotations"
|
||||
git commit -m "feat(mcp): add calendar sync"
|
||||
```
|
||||
|
||||
#### 2. Bump Component Versions
|
||||
|
||||
```bash
|
||||
# Bump MCP server (reads commits with scope=mcp or unscoped)
|
||||
./scripts/bump-mcp.sh
|
||||
# → Creates tag: v0.54.0
|
||||
# → Updates: pyproject.toml, Chart.yaml:appVersion
|
||||
|
||||
# Bump Helm chart (reads commits with scope=helm)
|
||||
./scripts/bump-helm.sh
|
||||
# → Creates tag: nextcloud-mcp-server-0.54.0
|
||||
# → Updates: Chart.yaml:version
|
||||
|
||||
# Bump Astrolabe (reads commits with scope=astrolabe)
|
||||
./scripts/bump-astrolabe.sh
|
||||
# → Creates tag: astrolabe-v0.2.0
|
||||
# → Updates: info.xml, package.json
|
||||
```
|
||||
|
||||
#### 3. Push Tags
|
||||
|
||||
```bash
|
||||
git push --follow-tags
|
||||
```
|
||||
|
||||
### Changelog Filtering
|
||||
|
||||
Each component maintains its own `CHANGELOG.md`:
|
||||
|
||||
- **MCP Server**: `CHANGELOG.md` (root) - includes `feat(mcp):` and unscoped commits
|
||||
- **Helm Chart**: `charts/nextcloud-mcp-server/CHANGELOG.md` - includes `feat(helm):` only
|
||||
- **Astrolabe**: `third_party/astrolabe/CHANGELOG.md` - includes `feat(astrolabe):` only
|
||||
|
||||
### Manual Version Bumps
|
||||
|
||||
For specific increments:
|
||||
|
||||
```bash
|
||||
# Patch bump (0.53.0 → 0.53.1)
|
||||
uv run cz bump --increment PATCH
|
||||
|
||||
# Minor bump (0.53.0 → 0.54.0)
|
||||
uv run cz bump --increment MINOR
|
||||
|
||||
# Major bump (0.53.0 → 1.0.0)
|
||||
uv run cz bump --increment MAJOR
|
||||
|
||||
# For non-MCP components, use --config
|
||||
cd charts/nextcloud-mcp-server
|
||||
uv run cz --config .cz.toml bump --increment MINOR
|
||||
```
|
||||
|
||||
### Versioning Philosophy
|
||||
|
||||
- **MCP Server**: Follows PEP 440, `major_version_zero = true` (0.x.x for pre-1.0)
|
||||
- **Helm Chart**: Follows PEP 440, starts at 0.53.0 (continues from current)
|
||||
- **Astrolabe**: Follows PEP 440, `major_version_zero = true` (0.x.x for alpha/beta)
|
||||
|
||||
### Chart.yaml Version vs appVersion
|
||||
|
||||
The Helm chart has TWO version fields:
|
||||
|
||||
- **`version`**: Chart packaging version (bumped by `feat(helm):`)
|
||||
- Example: `0.53.0` → `0.54.0` when adding resource limits
|
||||
|
||||
- **`appVersion`**: MCP server version being deployed (bumped by `feat(mcp):`)
|
||||
- Example: `"0.53.0"` → `"0.54.0"` when MCP server releases
|
||||
|
||||
This allows the chart to evolve independently from the application.
|
||||
+3
-10
@@ -1,28 +1,21 @@
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:fa48eefe2146644c2308b909d6bb7651a768178f84fc9550dcd495e4d6d84d01
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:2e683fc3e18a248aa23b8022f2a3474b072b04fb851efe9b49f6b516a8944939
|
||||
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.18@sha256:5713fa8217f92b80223bc83aac7db36ec80a84437dbc0d04bbc659cae030d8c9 /uv /uvx /bin/
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.10@sha256:29bd45092ea8902c0bbb7f0a338f0494a382b1f4b18355df5be270ade679ff1d /uv /uvx /bin/
|
||||
|
||||
# Install dependencies
|
||||
# 1. git (required for caldav dependency from git)
|
||||
# 2. sqlite for development with token db
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
git \
|
||||
tesseract-ocr \
|
||||
sqlite3 && apt clean
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY pyproject.toml uv.lock README.md .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-install-project --no-cache
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-editable --no-cache
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV VIRTUAL_ENV=/app/.venv
|
||||
ENV PATH=/app/.venv/bin:$PATH
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata
|
||||
|
||||
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "run", "--host", "0.0.0.0"]
|
||||
ENTRYPOINT ["/app/.venv/bin/nextcloud-mcp-server", "--host", "0.0.0.0"]
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
# Dockerfile for Smithery stateless deployment
|
||||
# ADR-016: Stateless mode for multi-user public Nextcloud instances
|
||||
#
|
||||
# This image excludes:
|
||||
# - Vector database dependencies (qdrant-client)
|
||||
# - Background sync workers
|
||||
# - Admin UI routes (/app)
|
||||
# - Semantic search tools
|
||||
#
|
||||
# Features included:
|
||||
# - Core Nextcloud tools (notes, calendar, contacts, files, deck, tables, cookbook)
|
||||
# - Per-session app password authentication
|
||||
# - Multi-user support via Smithery session config
|
||||
|
||||
FROM docker.io/library/python:3.12-slim-trixie@sha256:fa48eefe2146644c2308b909d6bb7651a768178f84fc9550dcd495e4d6d84d01
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install uv for fast dependency management
|
||||
COPY --from=ghcr.io/astral-sh/uv:0.9.18@sha256:5713fa8217f92b80223bc83aac7db36ec80a84437dbc0d04bbc659cae030d8c9 /uv /uvx /bin/
|
||||
|
||||
# Install dependencies
|
||||
# 1. git (required for caldav dependency from git)
|
||||
# 2. sqlite for development with token db
|
||||
RUN apt update && apt install --no-install-recommends --no-install-suggests -y \
|
||||
git
|
||||
|
||||
# Copy project files
|
||||
COPY . .
|
||||
|
||||
RUN uv sync --locked --no-dev --no-editable --no-cache
|
||||
|
||||
# Set Smithery mode environment variables
|
||||
ENV SMITHERY_DEPLOYMENT=true
|
||||
ENV VECTOR_SYNC_ENABLED=false
|
||||
|
||||
# Smithery sets PORT=8081 by default
|
||||
EXPOSE 8081
|
||||
|
||||
# Health check endpoint
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||
CMD uv run python -c "import httpx; httpx.get('http://localhost:${PORT:-8081}/health/live').raise_for_status()"
|
||||
|
||||
CMD ["/app/.venv/bin/smithery-main"]
|
||||
@@ -5,7 +5,6 @@
|
||||
# Nextcloud MCP Server
|
||||
|
||||
[](https://github.com/cbcoutinho/nextcloud-mcp-server/pkgs/container/nextcloud-mcp-server)
|
||||
[](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
|
||||
**A production-ready MCP server that connects AI assistants to your Nextcloud instance.**
|
||||
|
||||
@@ -18,20 +17,7 @@ This is a **dedicated standalone MCP server** designed for external MCP clients
|
||||
|
||||
## Quick Start
|
||||
|
||||
The fastest way to get started is via [Smithery](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server) - no Docker or self-hosting required:
|
||||
|
||||
1. Visit the [Smithery marketplace page](https://smithery.ai/server/@cbcoutinho/nextcloud-mcp-server)
|
||||
2. Click "Deploy" and configure:
|
||||
- **Nextcloud URL**: Your Nextcloud instance (e.g., `https://cloud.example.com`)
|
||||
- **Username**: Your Nextcloud username
|
||||
- **App Password**: Generate one in Nextcloud → Settings → Security → Devices & sessions
|
||||
|
||||
> [!NOTE]
|
||||
> Smithery runs in stateless mode without semantic search. For full features, use [Docker](#docker-self-hosted) or see [ADR-016](docs/ADR-016-smithery-stateless-deployment.md).
|
||||
|
||||
## Docker (Self-Hosted)
|
||||
|
||||
For full features including semantic search, run with Docker:
|
||||
Get up and running in 60 seconds using Docker:
|
||||
|
||||
```bash
|
||||
# 1. Create a minimal configuration
|
||||
@@ -51,11 +37,12 @@ curl http://127.0.0.1:8000/health/ready
|
||||
# 4. Connect to the endpoint
|
||||
http://127.0.0.1:8000/sse
|
||||
|
||||
# Or with --transport streamable-http
|
||||
# 4. Or with --transport streamable-http
|
||||
http://127.0.0.1:8000/mcp
|
||||
```
|
||||
|
||||
**Next Steps:**
|
||||
- Create an app password in Nextcloud: Settings → Security → Devices & sessions
|
||||
- Connect your MCP client (Claude Desktop, IDEs, `mcp dev`, etc.)
|
||||
- See [docs/installation.md](docs/installation.md) for other deployment options (local, Kubernetes)
|
||||
|
||||
@@ -63,7 +50,7 @@ http://127.0.0.1:8000/mcp
|
||||
|
||||
- **90+ MCP Tools** - Comprehensive API coverage across 8 Nextcloud apps
|
||||
- **MCP Resources** - Structured data URIs for browsing Nextcloud data
|
||||
- **Semantic Search (Experimental)** - Optional vector-powered search for Notes, Files, News items, and Deck cards (requires Qdrant + Ollama)
|
||||
- **Semantic Search (Experimental)** - Optional vector-powered search for Notes (requires Qdrant + Ollama)
|
||||
- **Document Processing** - OCR and text extraction from PDFs, DOCX, images with progress notifications
|
||||
- **Flexible Deployment** - Docker, Kubernetes (Helm), VM, or local installation
|
||||
- **Production-Ready Auth** - Basic Auth with app passwords (recommended) or OAuth2/OIDC (experimental)
|
||||
@@ -81,7 +68,7 @@ http://127.0.0.1:8000/mcp
|
||||
| **Cookbook** | 13 | Recipe management, URL import (schema.org) |
|
||||
| **Tables** | 5 | Row operations on Nextcloud Tables |
|
||||
| **Sharing** | 10+ | Create and manage shares |
|
||||
| **Semantic Search** | 2+ | Vector search for Notes, Files, News items, and Deck cards (experimental, opt-in, requires infrastructure) |
|
||||
| **Semantic Search** | 2+ | Vector search for Notes (experimental, opt-in, requires infrastructure) |
|
||||
|
||||
Want to see another Nextcloud app supported? [Open an issue](https://github.com/cbcoutinho/nextcloud-mcp-server/issues) or contribute a pull request!
|
||||
|
||||
@@ -99,7 +86,7 @@ Want to see another Nextcloud app supported? [Open an issue](https://github.com/
|
||||
|
||||
### Authentication Modes
|
||||
|
||||
The server supports three authentication modes:
|
||||
The server supports two authentication modes:
|
||||
|
||||
**Single-User Mode (BasicAuth):**
|
||||
- One set of credentials shared by all MCP clients
|
||||
@@ -113,12 +100,6 @@ The server supports three authentication modes:
|
||||
- More secure: tokens expire, credentials never shared with server
|
||||
- Best for: Teams, multi-user deployments, production environments with multiple users
|
||||
|
||||
**Hybrid Mode (Multi-User BasicAuth + OAuth):**
|
||||
- MCP clients use BasicAuth (simple, stateless)
|
||||
- Admin operations use OAuth (webhooks, background sync)
|
||||
- Best for: Nextcloud deployments with admin-managed webhooks and semantic search
|
||||
- Requires: `ENABLE_MULTI_USER_BASIC_AUTH=true` + `ENABLE_OFFLINE_ACCESS=true`
|
||||
|
||||
See [docs/authentication.md](docs/authentication.md) for detailed setup instructions.
|
||||
|
||||
## Semantic Search
|
||||
@@ -133,7 +114,7 @@ This enables natural language queries and helps discover related content across
|
||||
|
||||
> [!NOTE]
|
||||
> **Semantic Search is experimental and opt-in:**
|
||||
> - Disabled by default (`ENABLE_SEMANTIC_SEARCH=false`)
|
||||
> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
|
||||
> - Currently supports Notes app only (multi-app support planned)
|
||||
> - Requires additional infrastructure: vector database + embedding service
|
||||
> - Answer generation (`nc_semantic_search_answer`) requires MCP client sampling support
|
||||
@@ -151,7 +132,7 @@ This enables natural language queries and helps discover related content across
|
||||
### Features
|
||||
- **[App Documentation](docs/)** - Notes, Calendar, Contacts, WebDAV, Deck, Cookbook, Tables
|
||||
- **[Document Processing](docs/configuration.md#document-processing)** - OCR and text extraction setup
|
||||
- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes, Files, News items, Deck cards; opt-in)
|
||||
- **[Semantic Search Architecture](docs/semantic-search-architecture.md)** - Experimental vector search (Notes only, opt-in)
|
||||
- **[Vector Sync UI Guide](docs/user-guide/vector-sync-ui.md)** - Browser interface for semantic search visualization and testing
|
||||
|
||||
### Advanced Topics
|
||||
|
||||
-90
@@ -1,90 +0,0 @@
|
||||
# Alembic configuration file for nextcloud-mcp-server
|
||||
|
||||
[alembic]
|
||||
# Path to migration scripts
|
||||
script_location = nextcloud_mcp_server/alembic
|
||||
|
||||
# Template used to generate migration file names
|
||||
# Default: %%(rev)s_%%(slug)s
|
||||
file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d_%%(rev)s_%%(slug)s
|
||||
|
||||
# Timezone for migration timestamps
|
||||
# Default: utc
|
||||
timezone = utc
|
||||
|
||||
# Max length of characters to apply to the "slug" field
|
||||
# Default: 40
|
||||
# truncate_slug_length = 40
|
||||
|
||||
# Set to 'true' to run the environment during the 'revision' command
|
||||
# Default: false
|
||||
# revision_environment = false
|
||||
|
||||
# Set to 'true' to allow .pyc and .pyo files without a source .py file
|
||||
# Default: false
|
||||
# sourceless = false
|
||||
|
||||
# Version location specification
|
||||
# Supports single or multiple directories
|
||||
version_locations = nextcloud_mcp_server/alembic/versions
|
||||
|
||||
# Path separator for version locations (required to suppress deprecation warning)
|
||||
# Use os (for cross-platform compatibility)
|
||||
path_separator = os
|
||||
|
||||
# Set to 'true' to search source files recursively in each "version_locations" directory
|
||||
# Default: false
|
||||
# recursive_version_locations = false
|
||||
|
||||
# Output encoding used when revision files are written
|
||||
# Default: utf-8
|
||||
# output_encoding = utf-8
|
||||
|
||||
# Database URL - can be overridden by:
|
||||
# 1. Passing -x database_url=... to alembic commands
|
||||
# 2. Setting in environment via get_database_url() in env.py
|
||||
# Default: sqlite:///app/data/tokens.db
|
||||
sqlalchemy.url = sqlite+aiosqlite:////app/data/tokens.db
|
||||
|
||||
[post_write_hooks]
|
||||
# Post-write hooks allow you to run scripts after generating migration files
|
||||
# Example: format migrations with ruff
|
||||
# hooks = ruff
|
||||
# ruff.type = exec
|
||||
# ruff.executable = ruff
|
||||
# ruff.options = format REVISION_SCRIPT_FILENAME
|
||||
|
||||
# Logging configuration
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -1,71 +0,0 @@
|
||||
Database Migrations for nextcloud-mcp-server
|
||||
============================================
|
||||
|
||||
This directory contains Alembic database migrations for the token storage database.
|
||||
|
||||
Structure
|
||||
---------
|
||||
- env.py: Alembic environment configuration
|
||||
- script.py.mako: Template for generating new migration files
|
||||
- versions/: Directory containing migration scripts
|
||||
|
||||
Usage
|
||||
-----
|
||||
Migrations are managed via the CLI:
|
||||
|
||||
# Upgrade database to latest version
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show current database version
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Create a new migration (developers only)
|
||||
uv run nextcloud-mcp-server db migrate "description of changes"
|
||||
|
||||
# Downgrade database by one version (emergency use only)
|
||||
uv run nextcloud-mcp-server db downgrade
|
||||
|
||||
Direct Alembic Usage
|
||||
--------------------
|
||||
You can also use Alembic commands directly:
|
||||
|
||||
# Specify database URL via -x flag
|
||||
uv run alembic -x database_url=sqlite+aiosqlite:////path/to/tokens.db upgrade head
|
||||
|
||||
# Or set in alembic.ini and run
|
||||
uv run alembic upgrade head
|
||||
uv run alembic current
|
||||
uv run alembic history
|
||||
|
||||
Writing Migrations
|
||||
------------------
|
||||
Since we don't use SQLAlchemy models, migrations are written with raw SQL:
|
||||
|
||||
def upgrade() -> None:
|
||||
op.execute("""
|
||||
ALTER TABLE refresh_tokens
|
||||
ADD COLUMN new_field TEXT
|
||||
""")
|
||||
|
||||
def downgrade() -> None:
|
||||
# SQLite doesn't support DROP COLUMN, use table recreation
|
||||
op.execute("""
|
||||
CREATE TABLE refresh_tokens_new AS
|
||||
SELECT user_id, encrypted_token, ... FROM refresh_tokens
|
||||
""")
|
||||
op.execute("DROP TABLE refresh_tokens")
|
||||
op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
|
||||
|
||||
Migration File Naming
|
||||
---------------------
|
||||
Format: YYYYMMDD_HHMM_<revision>_<slug>.py
|
||||
Example: 20251217_2200_001_initial_schema.py
|
||||
|
||||
Notes
|
||||
-----
|
||||
- Migrations run automatically when RefreshTokenStorage.initialize() is called
|
||||
- Existing databases are automatically stamped with the initial version
|
||||
- SQLite has limited ALTER TABLE support - complex changes require table recreation
|
||||
@@ -1,26 +0,0 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
|
||||
"""
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = ${repr(up_revision)}
|
||||
down_revision = ${repr(down_revision)}
|
||||
branch_labels = ${repr(branch_labels)}
|
||||
depends_on = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Apply migration changes to upgrade the database schema."""
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Revert migration changes to downgrade the database schema."""
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -3,9 +3,3 @@
|
||||
set -euox pipefail
|
||||
|
||||
php /var/www/html/occ config:system:set trusted_domains 2 --value=host.docker.internal
|
||||
|
||||
# Set overwrite.cli.url to the external URL for OIDC discovery
|
||||
# This ensures OAuth flows redirect to the correct external URL
|
||||
# Important: The Astrolabe OAuth controller makes internal HTTP requests to /.well-known/openid-configuration
|
||||
# which needs to return URLs reachable by external browsers (localhost:8080, not localhost:80)
|
||||
php /var/www/html/occ config:system:set overwrite.cli.url --value="http://localhost:8080"
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euox pipefail
|
||||
|
||||
php /var/www/html/occ app:enable news
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -euox pipefail
|
||||
|
||||
echo "Installing Astrolabe app for testing..."
|
||||
|
||||
# Check if development astrolabe app is mounted at /opt/apps/astrolabe
|
||||
if [ -d /opt/apps/astrolabe ]; then
|
||||
echo "Development astrolabe app found at /opt/apps/astrolabe"
|
||||
|
||||
# Remove any existing astrolabe app in custom_apps (from app store or old symlink)
|
||||
if [ -e /var/www/html/custom_apps/astrolabe ]; then
|
||||
echo "Removing existing astrolabe in custom_apps..."
|
||||
rm -rf /var/www/html/custom_apps/astrolabe
|
||||
fi
|
||||
|
||||
# Create symlink from custom_apps to the mounted development version
|
||||
# Per Nextcloud docs: apps outside server root need symlinks in server root
|
||||
echo "Creating symlink: custom_apps/astrolabe -> /opt/apps/astrolabe"
|
||||
ln -sf /opt/apps/astrolabe /var/www/html/custom_apps/astrolabe
|
||||
|
||||
echo "Enabling astrolabe app from /opt/apps (development mode via symlink)"
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
elif [ -d /var/www/html/custom_apps/astrolabe ]; then
|
||||
echo "astrolabe app directory found in custom_apps (already installed)"
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
else
|
||||
echo "astrolabe app not found, installing from app store..."
|
||||
php /var/www/html/occ app:install astrolabe
|
||||
php /var/www/html/occ app:enable astrolabe
|
||||
fi
|
||||
|
||||
echo "✓ Astrolabe app installed successfully"
|
||||
echo ""
|
||||
echo "Note: MCP server configuration is managed dynamically during tests"
|
||||
echo " to support testing multiple MCP server deployments."
|
||||
@@ -1,25 +0,0 @@
|
||||
[tool.commitizen]
|
||||
name = "cz_conventional_commits"
|
||||
version = "0.56.0"
|
||||
tag_format = "nextcloud-mcp-server-$version"
|
||||
version_scheme = "semver"
|
||||
update_changelog_on_bump = true
|
||||
major_version_zero = true
|
||||
|
||||
# Update chart version only (NOT appVersion)
|
||||
version_files = [
|
||||
"Chart.yaml:^version:"
|
||||
]
|
||||
|
||||
# Ignore tags from other components
|
||||
ignored_tag_formats = [
|
||||
"v*", # MCP server tags
|
||||
"astrolabe-v*", # Astrolabe tags
|
||||
]
|
||||
|
||||
# Filter commits by scope
|
||||
# Includes helm-scoped commits AND MCP server version bumps (which update appVersion)
|
||||
[tool.commitizen.customize]
|
||||
changelog_pattern = "^((feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:|bump: version.*→.*)"
|
||||
schema_pattern = "^(feat|fix|docs|refactor|perf|test|build|ci|chore)\\(helm\\)(!)?:\\s.+"
|
||||
message_template = "{{change_type}}(helm): {{message}}"
|
||||
@@ -1,821 +0,0 @@
|
||||
# Changelog - Helm Chart
|
||||
|
||||
All notable changes to the Helm chart will be documented in this file.
|
||||
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
|
||||
### Added
|
||||
- Initial independent versioning release
|
||||
- Support for Nextcloud MCP server deployment
|
||||
- Qdrant subchart integration
|
||||
- Ollama subchart integration
|
||||
- Configurable resource limits
|
||||
- Grafana dashboard annotations
|
||||
|
||||
## nextcloud-mcp-server-0.56.0 (2025-12-26)
|
||||
|
||||
### Feat
|
||||
|
||||
- Remove URL rewriting in favor of proper nextcloud config
|
||||
- **helm**: migrate to new environment variable naming convention
|
||||
- Migrate to vue 3
|
||||
- **astrolabe**: upgrade to Vue 3 and @nextcloud/vue 9
|
||||
|
||||
### Fix
|
||||
|
||||
- **tests**: Add singleton reset fixture to prevent anyio.WouldBlock errors
|
||||
- **tests**: Fix integration test failures in qdrant, sampling, and rag tests
|
||||
- **auth**: Skip issuer validation for management API tokens
|
||||
- Use settings.enable_offline_access for env var consolidation
|
||||
- Add required config.py attributes
|
||||
- **docker**: remove overwritehost to fix container-to-container DCR
|
||||
- **deps**: update dependency @nextcloud/vue to v9
|
||||
- **deps**: update dependency vue to v3
|
||||
|
||||
### Refactor
|
||||
|
||||
- **auth**: Decouple BasicAuth and OAuth authentication strategies
|
||||
|
||||
## nextcloud-mcp-server-0.55.2 (2025-12-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: set OIDC client env vars when using existingSecret
|
||||
|
||||
## nextcloud-mcp-server-0.55.1 (2025-12-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: trigger chart release workflow on helm chart tags
|
||||
|
||||
## nextcloud-mcp-server-0.55.0 (2025-12-22)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- MCP server now bumps for ANY conventional commit except
|
||||
those explicitly scoped to helm or astrolabe.
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: add support for multi-user BasicAuth mode
|
||||
- **config**: enable DCR for multi-user BasicAuth with offline access
|
||||
- **astrolabe**: implement app password provisioning for multi-user background sync
|
||||
- **config**: consolidate configuration with smart dependency resolution (ADR-021)
|
||||
- **auth**: add multi-user BasicAuth pass-through mode
|
||||
- **astrolabe**: add dynamic MCP server configuration for testing
|
||||
- **ci**: add --increment flag to bump scripts for manual version control
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: address PR #447 reviewer feedback
|
||||
- **helm**: include MCP server version bumps in changelog pattern
|
||||
- **config**: address reviewer feedback
|
||||
- **astrolabe**: screenshots in info.xml
|
||||
- **astrolabe**: screenshots in info.xml
|
||||
- **astrolabe**: Update screenshots
|
||||
- **ci**: skip existing Helm chart releases to prevent duplicate release errors
|
||||
- **astrolabe**: add contents:write permission to appstore workflow
|
||||
- **astrolabe**: update commitizen pattern to properly update info.xml version
|
||||
- **astrolabe**: prevent workflow failure when only helm/astrolabe commits exist
|
||||
- **astrolabe**: info.xml
|
||||
- **ci**: push all tags explicitly in bump workflow
|
||||
- **ci**: make MCP server default bump target for all non-scoped commits
|
||||
- **ci**: restrict docker build to MCP server tags only
|
||||
- **ci**: correct appstore-push-action version to v1.0.4
|
||||
|
||||
### Refactor
|
||||
|
||||
- **config**: centralize configuration validation and simplify startup
|
||||
|
||||
## nextcloud-mcp-server-0.54.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ci**: implement monorepo-aware version bumping workflow
|
||||
- **astrolabe**: add Nextcloud App Store deployment automation
|
||||
- configure commitizen monorepo with independent versioning
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: improve versioning and error handling
|
||||
- **ci**: address critical workflow and validation issues
|
||||
- **astrolabe**: address code review feedback
|
||||
|
||||
## nextcloud-mcp-server-0.53.0 (2025-12-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- add Alembic database migration system
|
||||
- make chunk modal title clickable link to documents
|
||||
- add native Plotly hover styling for clickable points
|
||||
- add click interactivity to Plotly 3D scatter chart
|
||||
- improve chunk viewer with fixed navigation and markdown rendering
|
||||
- **astrolabe**: enable multi-select for document types and refactor PDF viewer
|
||||
- **auth**: implement refresh token rotation for Nextcloud OIDC
|
||||
- **astrolabe**: enhance unified search and add webhook management
|
||||
- **astrolabe**: add webhook management UI to admin settings
|
||||
- **astrolabe**: add OAuth token refresh and webhook presets
|
||||
- **search**: add file_path metadata and chunk offsets to search results
|
||||
- **astrolabe**: use proper icons and thumbnails in unified search
|
||||
- **astrolabe**: add admin search settings and enhanced UI
|
||||
- **astrolabe**: add unified search provider with clickable file links
|
||||
- **astrolabe**: add 3D PCA visualization for semantic search
|
||||
- **astrolabe**: add Nextcloud PHP app for MCP server management
|
||||
- **vector-sync**: enable background sync in OAuth mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **security**: address critical security issues from PR #401 code review
|
||||
- **oauth**: enable PKCE for all clients and add token_broker to oauth_context
|
||||
- **astrolabe**: revert invalid files_pdfviewer URL for file links
|
||||
- resolve type checking warnings for CI
|
||||
- move Alembic to package submodule for Docker compatibility
|
||||
- update unified search results to match chunk viz display
|
||||
- **astrolabe**: handle OAuth refresh token rotation
|
||||
- address critical code review issues (4 fixes)
|
||||
- resolve CI linting issues for Astroglobe
|
||||
|
||||
### Refactor
|
||||
|
||||
- **astrolabe**: extract PDF viewer to dedicated component
|
||||
- **astrolabe**: reframe UI as semantic search service
|
||||
|
||||
## nextcloud-mcp-server-0.52.1 (2025-12-13)
|
||||
|
||||
## nextcloud-mcp-server-0.52.0 (2025-12-13)
|
||||
|
||||
## nextcloud-mcp-server-0.51.0 (2025-12-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- **vector**: add Deck card vector search with visualization support
|
||||
- **vector-viz**: add news_item support for links and chunk expansion
|
||||
|
||||
### Perf
|
||||
|
||||
- **deck**: optimize card lookup by storing board_id/stack_id in metadata
|
||||
|
||||
## nextcloud-mcp-server-0.50.2 (2025-12-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- **news**: revert get_item() to use get_items() + filter
|
||||
|
||||
## nextcloud-mcp-server-0.50.1 (2025-12-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- Disable DNS rebinding protection for containerized deployments
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
## nextcloud-mcp-server-0.50.0 (2025-12-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- add MCP tool annotations for enhanced UX
|
||||
|
||||
### Fix
|
||||
|
||||
- address PR review feedback
|
||||
|
||||
## nextcloud-mcp-server-0.49.2 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update lockfile
|
||||
|
||||
## nextcloud-mcp-server-0.49.1 (2025-12-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- Revert mcp version <1.23
|
||||
|
||||
## nextcloud-mcp-server-0.49.0 (2025-12-08)
|
||||
|
||||
### Fix
|
||||
|
||||
- resolve all type checking errors (8 errors fixed)
|
||||
- **deps**: update dependency mcp to >=1.23,<1.24
|
||||
|
||||
### Perf
|
||||
|
||||
- **news**: use direct API endpoint for get_item()
|
||||
|
||||
## nextcloud-mcp-server-0.48.5 (2025-11-28)
|
||||
|
||||
### Feat
|
||||
|
||||
- **news**: add Nextcloud News app integration
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency pillow to v12
|
||||
|
||||
### Refactor
|
||||
|
||||
- **news**: simplify vector sync to fetch all items
|
||||
|
||||
## nextcloud-mcp-server-0.48.4 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Add rate limit retry logic to OpenAI provider
|
||||
|
||||
## nextcloud-mcp-server-0.48.3 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Increase MCP sampling timeout to 5 minutes for slower LLMs
|
||||
|
||||
## nextcloud-mcp-server-0.48.2 (2025-11-23)
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
|
||||
## nextcloud-mcp-server-0.48.1 (2025-11-23)
|
||||
|
||||
## nextcloud-mcp-server-0.48.0 (2025-11-23)
|
||||
|
||||
## nextcloud-mcp-server-0.47.0 (2025-11-23)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add tag management methods to WebDAV client
|
||||
- Add OpenAI provider support for embeddings and generation
|
||||
|
||||
### Fix
|
||||
|
||||
- Share vector sync state with FastMCP session lifespan via module singleton
|
||||
- Use WebDAV for tag creation and add LLM-as-a-judge for RAG tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- Move background tasks to server lifespan and deprecate SSE transport
|
||||
|
||||
## nextcloud-mcp-server-0.46.2 (2025-11-22)
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Enable JSON response format for scanner compatibility
|
||||
|
||||
## nextcloud-mcp-server-0.46.1 (2025-11-22)
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize vector viz search performance
|
||||
|
||||
## nextcloud-mcp-server-0.46.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Smithery CLI deployment support
|
||||
- Implement ADR-016 Smithery stateless deployment mode
|
||||
|
||||
### Fix
|
||||
|
||||
- **smithery**: Add JSON Schema metadata to mcp-config endpoint
|
||||
- **smithery**: Use container runtime pattern for config discovery
|
||||
- Add Smithery lifespan and auth mode detection
|
||||
|
||||
## nextcloud-mcp-server-0.45.0 (2025-11-22)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add context expansion to semantic search with chunk overlap removal
|
||||
- Use Ollama native batch API in embed_batch()
|
||||
- Implement Qdrant placeholder state management
|
||||
- Switch files to use numeric IDs with file_path resolution
|
||||
- Implement per-chunk vector visualization with context expansion
|
||||
|
||||
### Fix
|
||||
|
||||
- Use alpha_composite for proper RGBA highlight blending
|
||||
- Remove pymupdf.layout.activate() to fix page_chunks behavior
|
||||
- Centralize PDF processing and generate separate images per chunk
|
||||
- Set is_placeholder=False in processor to fix search filtering
|
||||
- Increase placeholder staleness threshold to 5x scan interval
|
||||
- Add placeholder staleness check to prevent duplicate processing
|
||||
- Use empty SparseVector instead of None for placeholders
|
||||
- Return empty array instead of null for query_coords when no results
|
||||
- Align PDF text extraction between indexing and context expansion
|
||||
- Update models and viz to use int-only doc_id
|
||||
- Reconstruct full content for notes to match indexed offsets
|
||||
- Add async/await, PDF metadata, and type safety fixes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Simplify PDF text extraction with single to_markdown call
|
||||
|
||||
### Perf
|
||||
|
||||
- Optimize PDF processing with parallel extraction and single-render highlights
|
||||
|
||||
## nextcloud-mcp-server-0.44.1 (2025-11-21)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.22,<1.23
|
||||
|
||||
## nextcloud-mcp-server-0.44.0 (2025-11-19)
|
||||
|
||||
### Feat
|
||||
|
||||
- Improve vector visualization with static assets and fixes
|
||||
- Redesign UI to match Nextcloud ecosystem aesthetic
|
||||
|
||||
### Fix
|
||||
|
||||
- Improve 3D plot rendering with explicit dimensions and window resize support
|
||||
- Preserve 3D plot camera and improve documentation
|
||||
- Preserve 3D plot camera position and fix CSS loading
|
||||
|
||||
## nextcloud-mcp-server-0.43.0 (2025-11-18)
|
||||
|
||||
### Feat
|
||||
|
||||
- Replace custom document chunker with LangChain MarkdownTextSplitter
|
||||
|
||||
## nextcloud-mcp-server-0.42.0 (2025-11-17)
|
||||
|
||||
### Feat
|
||||
|
||||
- **viz**: Add dual-score display and improve UI controls
|
||||
|
||||
## nextcloud-mcp-server-0.41.0 (2025-11-17)
|
||||
|
||||
### Feat
|
||||
|
||||
- add configurable fusion algorithms for BM25 hybrid search
|
||||
- add chunk position tracking to vector indexing and search
|
||||
- add vector viz template and chunk context endpoint
|
||||
|
||||
### Fix
|
||||
|
||||
- prevent infinite loop in DocumentChunker with position tracking
|
||||
- Relax SearchResult validation to support DBSF fusion scores > 1.0
|
||||
|
||||
## nextcloud-mcp-server-0.40.0 (2025-11-16)
|
||||
|
||||
### Feat
|
||||
|
||||
- add unified provider architecture with Amazon Bedrock support
|
||||
|
||||
### Fix
|
||||
|
||||
- suppress Starlette middleware type warnings in ty checker
|
||||
|
||||
## nextcloud-mcp-server-0.39.0 (2025-11-16)
|
||||
|
||||
## nextcloud-mcp-server-0.38.0 (2025-11-16)
|
||||
|
||||
### Feat
|
||||
|
||||
- add concurrent uploads and --force flag to upload command
|
||||
- implement RAG evaluation framework with CLI tooling
|
||||
- Add OpenTelemetry tracing to @instrument_tool decorator
|
||||
- Implement BM25 hybrid search with native Qdrant RRF fusion
|
||||
|
||||
### Fix
|
||||
|
||||
- download qrels from BEIR ZIP instead of HuggingFace
|
||||
- Handle named vectors in visualization and semantic search
|
||||
- Update vizApp to use bm25_hybrid algorithm and remove deprecated weights
|
||||
- Update viz routes to use BM25 hybrid search after refactor
|
||||
|
||||
### Refactor
|
||||
|
||||
- migrate asyncio to anyio for consistent structured concurrency
|
||||
- replace httpx client with NextcloudClient in upload command
|
||||
|
||||
### Perf
|
||||
|
||||
- Eliminate double-fetching in semantic search sampling
|
||||
- fix vector viz search performance and visual encoding
|
||||
- make note deletion concurrent in upload --force
|
||||
|
||||
## nextcloud-mcp-server-0.36.0 (2025-11-15)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- Search algorithms now require Qdrant to be populated.
|
||||
Vector sync must be enabled and documents indexed for search to work.
|
||||
|
||||
### Feat
|
||||
|
||||
- Normalize hybrid search RRF scores to 0-1 range
|
||||
- Enhance vector visualization UI and parallelize search verification
|
||||
- Add Vector Viz tab to app home page
|
||||
- Add vector visualization pane with multi-select document types
|
||||
- Implement custom PCA to remove sklearn dependency
|
||||
- Add multi-document Protocol with cross-app search support
|
||||
- Update nc_semantic_search tool with algorithm selection
|
||||
- Implement unified search algorithm module
|
||||
|
||||
### Fix
|
||||
|
||||
- Reorder tabs and fix viz pane session access
|
||||
|
||||
### Refactor
|
||||
|
||||
- Optimize Nextcloud access verification with centralized filtering
|
||||
- Make all search algorithms query Qdrant payload, not Nextcloud
|
||||
|
||||
### Perf
|
||||
|
||||
- Exclude vector-sync status polling from distributed tracing
|
||||
|
||||
## nextcloud-mcp-server-0.35.0 (2025-11-15)
|
||||
|
||||
### Feat
|
||||
|
||||
- Enable SSE transport for mcp service and update test fixtures
|
||||
|
||||
## nextcloud-mcp-server-0.34.2 (2025-11-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- Use NEXTCLOUD_OIDC_CLIENT_ID/SECRET env vars consistently
|
||||
- return all notes when search query is empty
|
||||
|
||||
## nextcloud-mcp-server-0.34.0 (2025-11-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- Complete Phase 5 - Instrument all 93 MCP tools
|
||||
- Add instrumentation decorator and apply to notes tools (Phase 5)
|
||||
- Add OAuth token and database metrics (Phases 3-4)
|
||||
- Add metrics instrumentation for queue, health, and database operations
|
||||
|
||||
## nextcloud-mcp-server-0.33.1 (2025-11-13)
|
||||
|
||||
### Fix
|
||||
|
||||
- Move grafana_folder from labels to annotations
|
||||
|
||||
## nextcloud-mcp-server-0.33.0 (2025-11-13)
|
||||
|
||||
### Feat
|
||||
|
||||
- Add Grafana dashboard and vector sync metric instrumentation
|
||||
|
||||
## nextcloud-mcp-server-0.32.1 (2025-11-12)
|
||||
|
||||
### Fix
|
||||
|
||||
- add dynamic dimension detection for Ollama embedding models
|
||||
|
||||
## nextcloud-mcp-server-0.32.0 (2025-11-11)
|
||||
|
||||
### Feat
|
||||
|
||||
- **ollama**: Pull model on startup if not available in ollama
|
||||
- add dynamic vector sync status updates with htmx polling
|
||||
- add webhook management UI and BeforeNodeDeletedEvent support
|
||||
- validate Nextcloud webhook schemas and document findings
|
||||
|
||||
### Fix
|
||||
|
||||
- improve webapp tab UI with CSS Grid and viewport-filling container
|
||||
|
||||
### Refactor
|
||||
|
||||
- move webapp from /user/page to /app
|
||||
- consolidate database storage for webhooks and OAuth tokens
|
||||
|
||||
## nextcloud-mcp-server-0.31.1 (2025-11-10)
|
||||
|
||||
### Refactor
|
||||
|
||||
- simplify OpenTelemetry tracing configuration
|
||||
|
||||
## nextcloud-mcp-server-0.31.0 (2025-11-10)
|
||||
|
||||
### Feat
|
||||
|
||||
- skip tracing for health and metrics endpoints
|
||||
|
||||
### Fix
|
||||
|
||||
- add retry logic for ETag conflicts in category change test
|
||||
- optimize Notes API pagination with pruneBefore parameter
|
||||
|
||||
## nextcloud-mcp-server-0.30.0 (2025-11-10)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: Add document chunking configuration
|
||||
- **vector**: Add configurable chunk size and overlap for document embedding
|
||||
- **vector**: Support multiple embedding models with auto-generated collection names
|
||||
|
||||
### Fix
|
||||
|
||||
- Support in-memory Qdrant for CI testing
|
||||
|
||||
## nextcloud-mcp-server-0.29.2 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Set default strategy to Recreate
|
||||
|
||||
## nextcloud-mcp-server-0.29.1 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **observability**: isolate metrics endpoint to dedicated port
|
||||
|
||||
## nextcloud-mcp-server-0.29.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: Add observability support with ServiceMonitor and Grafana dashboard
|
||||
|
||||
### Fix
|
||||
|
||||
- **readiness**: Only check external Qdrant in network mode
|
||||
|
||||
## nextcloud-mcp-server-0.28.0 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **observability**: Add comprehensive monitoring with Prometheus and OpenTelemetry
|
||||
|
||||
### Fix
|
||||
|
||||
- **vector**: Handle missing 'modified' field in notes gracefully
|
||||
|
||||
## nextcloud-mcp-server-0.27.3 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: Use helm dependency build instead of update to use Chart.lock
|
||||
|
||||
## nextcloud-mcp-server-0.27.2 (2025-11-09)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: update Qdrant dependency condition to match new mode structure
|
||||
|
||||
## nextcloud-mcp-server-0.27.1 (2025-11-09)
|
||||
|
||||
### Feat
|
||||
|
||||
- **helm**: add Qdrant local mode support with three deployment options [skip ci]
|
||||
- add Qdrant local mode support with in-memory and persistent storage
|
||||
- implement ADR-009 - refactor semantic search to use generic semantic:read scope
|
||||
- implement MCP sampling for semantic search RAG (ADR-008)
|
||||
- add optional vector database and semantic search to helm chart
|
||||
- add vector sync processing status to /user/page endpoint
|
||||
- implement semantic search tool and fix vector sync issues (ADR-007 Phase 3)
|
||||
- implement vector sync scanner and processor (ADR-007 Phase 2)
|
||||
|
||||
### Fix
|
||||
|
||||
- **ci**: add Helm repository setup to chart release workflow
|
||||
- implement deletion grace period and vector sync status tool
|
||||
- remove unnecessary urllib3<2.0 constraint
|
||||
- integrate vector sync tasks with Starlette lifespan for streamable-http
|
||||
|
||||
### Refactor
|
||||
|
||||
- migrate vector sync from asyncio.Queue to anyio memory object streams
|
||||
- update to Qdrant query_points API and fix Playwright Keycloak login
|
||||
|
||||
## nextcloud-mcp-server-0.26.1 (2025-11-08)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.21,<1.22
|
||||
|
||||
## nextcloud-mcp-server-0.26.0 (2025-11-08)
|
||||
|
||||
### Feat
|
||||
|
||||
- add real elicitation integration test with python-sdk MCP client
|
||||
- unify session architecture and enhance login status visibility
|
||||
|
||||
### Fix
|
||||
|
||||
- Consolidate OAuth callbacks and implement PKCE for all flows
|
||||
|
||||
## nextcloud-mcp-server-0.25.0 (2025-11-05)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- All OAuth deployments must be reconfigured to specify
|
||||
resource URIs (NEXTCLOUD_MCP_SERVER_URL and NEXTCLOUD_RESOURCE_URI) and
|
||||
choose between multi-audience or token exchange mode.
|
||||
|
||||
### Feat
|
||||
|
||||
- Implement ADR-005 unified token verifier to eliminate token passthrough vulnerability
|
||||
|
||||
### Fix
|
||||
|
||||
- Implement proper OAuth resource parameters and PRM-based discovery
|
||||
- Simplify token verifier to be RFC 7519 compliant
|
||||
- Use Keycloak client ID for NEXTCLOUD_RESOURCE_URI in token exchange
|
||||
- Correct OAuth token audience validation for multi-audience mode
|
||||
|
||||
### Refactor
|
||||
|
||||
- Eliminate duplicate validation logic in UnifiedTokenVerifier
|
||||
|
||||
## nextcloud-mcp-server-0.24.1 (2025-11-04)
|
||||
|
||||
### Fix
|
||||
|
||||
- **deps**: update dependency mcp to >=1.20,<1.21
|
||||
|
||||
## nextcloud-mcp-server-0.24.0 (2025-11-04)
|
||||
|
||||
### Feat
|
||||
|
||||
- add scope protection to OAuth provisioning tools
|
||||
- enable authorization services for token exchange in Keycloak
|
||||
- implement scope-based audience mapping and RFC 9728 support
|
||||
- integrate token exchange into MCP server application
|
||||
- implement RFC 8693 Standard Token Exchange for Keycloak
|
||||
- Add userinfo route/page
|
||||
- add browser-based user info page with separate OAuth flow
|
||||
- Implement ADR-004 Progressive Consent foundation (partial)
|
||||
- Complete ADR-004 Progressive Consent OAuth flows implementation
|
||||
- Implement ADR-004 Progressive Consent foundation components
|
||||
- Implement ADR-004 Hybrid Flow with comprehensive integration tests
|
||||
|
||||
### Fix
|
||||
|
||||
- add missing await for get_nextcloud_client in capabilities resource
|
||||
- use valid Fernet encryption keys in token exchange tests
|
||||
- accept resource URL in token audience for Nextcloud JWT tokens
|
||||
- remove token-exchange-nextcloud scope and accept tokens without audience
|
||||
- move audience mapper from scope to nextcloud-mcp-server client
|
||||
- move token-exchange-nextcloud from default to optional scopes
|
||||
- restructure routes to prevent SessionAuthBackend from interfering with FastMCP OAuth
|
||||
- allow OAuth Bearer tokens on /mcp endpoint by excluding from session auth
|
||||
- correct OAuth token audience validation using RFC 8707 resource parameter
|
||||
- remove remaining references to deleted oauth_callback and oauth_token
|
||||
- remove Hybrid Flow, make Progressive Consent default (ADR-004)
|
||||
- browser OAuth userinfo endpoint and refresh token rotation
|
||||
- make ENABLE_PROGRESSIVE_CONSENT consistently opt-in (default false)
|
||||
- make provisioning checks opt-in (default false)
|
||||
- Disable Progressive Consent for mcp-oauth to enable Hybrid Flow tests
|
||||
|
||||
### Refactor
|
||||
|
||||
- integrate token exchange into unified get_client() pattern
|
||||
|
||||
## nextcloud-mcp-server-0.23.0 (2025-11-03)
|
||||
|
||||
### Feat
|
||||
|
||||
- Auto-configure impersonation role in Keycloak realm import
|
||||
- Implement dual-tier token exchange (Standard V2 + Legacy V1 impersonation)
|
||||
- Add Keycloak external IdP integration with custom scopes
|
||||
- Implement RFC 8693 token exchange for Keycloak (ADR-002 Tier 2)
|
||||
- Add Keycloak OAuth provider support with refresh token storage
|
||||
|
||||
### Fix
|
||||
|
||||
- Complete Keycloak external IdP integration with all tests passing
|
||||
- Complete Keycloak external IdP integration with all tests passing
|
||||
- Update DCR token_type tests for OIDC app changes
|
||||
|
||||
### Refactor
|
||||
|
||||
- Remove NEXTCLOUD_OIDC_CLIENT_STORAGE environment variable
|
||||
- Remove unnecessary user_oidc patch - CORSMiddleware patch is sufficient
|
||||
- Unify OAuth configuration to be provider-agnostic
|
||||
|
||||
## nextcloud-mcp-server-0.22.7 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Remove image tag overide
|
||||
|
||||
## nextcloud-mcp-server-0.22.6 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm chart with extraArgs
|
||||
|
||||
## nextcloud-mcp-server-0.22.5 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- Update helm chart variables
|
||||
|
||||
## nextcloud-mcp-server-0.22.4 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
- **helm**: Update helm version with release
|
||||
|
||||
## nextcloud-mcp-server-0.1.1 (2025-10-29)
|
||||
|
||||
### Fix
|
||||
|
||||
- **helm**: Update helm version with release
|
||||
- Trigger release
|
||||
|
||||
## nextcloud-mcp-server-0.1.0 (2025-10-29)
|
||||
|
||||
### BREAKING CHANGE
|
||||
|
||||
- FASTMCP_-prefixed env vars have been replaced by CLI
|
||||
arguments. Refer to the README for updated usage.
|
||||
|
||||
### Feat
|
||||
|
||||
- **server**: Add /live & /health endpoints
|
||||
- Initialize helm chart
|
||||
- Add text processing background worker for telling client about progress
|
||||
- **auth**: Add support for client registration deletion
|
||||
- Split read/write scopes into app:read/write scopes
|
||||
- Enable token introspection for opaque tokens
|
||||
- **server**: Add support for custom OIDC scopes and permissions via JWTs
|
||||
- Initialize JWT-scoped tools
|
||||
- **caldav**: Add support for tasks
|
||||
- **webdav**: Add search and list favorite response tools
|
||||
- **cookbook**: Add full Cookbook app support with 13 tools and 2 resources
|
||||
- Add Groups API client
|
||||
- add sharing API client and server tools
|
||||
- **server**: Experimental support for OAuth2/OIDC authentication
|
||||
- **users**: Initialize user API client
|
||||
- **server**: Add support for `streamable-http` transport type
|
||||
- Add WebDAV resource copy functionality
|
||||
- Add WebDAV resource move/rename functionality
|
||||
- **deck**: Add support for stack, cards, labels
|
||||
- **deck**: Initialize Deck app client/server
|
||||
- **cli**: Replace `mcp run` with click CLI and runtime options
|
||||
- **client**: Preserve fields when modifying contacts/calendar resources
|
||||
- **server**: Add structured output to all tool/resource output
|
||||
- **contacts**: Initialize Contacts App
|
||||
- **calendar**: add comprehensive Calendar app support via CalDAV protocol
|
||||
- Update webdav client create_directory method to handle recursive directories
|
||||
- **webdav**: add complete file system support
|
||||
- Add TablesClient and associated tools
|
||||
- Switch to using async client
|
||||
- **notes**: Add append to note functionality
|
||||
|
||||
### Fix
|
||||
|
||||
- Add support for RFC 7592 client registration and deletion
|
||||
- Update webdav models for proper serialization
|
||||
- **deps**: update dependency mcp to >=1.19,<1.20
|
||||
- Add CORS middleware to allow browser-based clients like MCP Inspector
|
||||
- Use occ-created OAuth clients with allowed_scopes for all tests
|
||||
- Separate OAuth fixtures for opaque vs JWT tokens
|
||||
- **caldav**: Fix caldav search() due to missing todos
|
||||
- **caldav**: Check that calendar exists after creation to avoid race condition
|
||||
- **caldav**: Properly parse datetimes as vDDDTypes
|
||||
- Increase HTTP client timeout to 30s
|
||||
- Handle RequestError in mcp tools
|
||||
- **deps**: update dependency mcp to >=1.18,<1.19
|
||||
- **deps**: update dependency pillow to v12
|
||||
- **oauth**: Remove the option to force_register new clients
|
||||
- Update user/groups API to OCS v2
|
||||
- **deps**: update dependency mcp to >=1.17,<1.18
|
||||
- **deps**: update dependency mcp to >=1.16,<1.17
|
||||
- **deps**: update dependency mcp to >=1.15,<1.16
|
||||
- **docker**: Provide --host 0.0.0.0 in default docker image
|
||||
- **deps**: update dependency mcp to >=1.13,<1.14
|
||||
- **server**: Replace ErrorResponses with standard McpErrors
|
||||
- **notes**: Include ETags in responses to avoid accidently updates
|
||||
- **notes**: Remove note contents from responses to reduce token usage
|
||||
- **model**: Serialize timestamps in RFC3339 format
|
||||
- **client**: Use paging to fetch all notes
|
||||
- **client**: Strip cookies from responses to avoid falsely raising CSRF errors
|
||||
- **calendar**: Fix iCalendar date vs datetime format
|
||||
- **calendar**: Remove try/except in calendar API
|
||||
- apply ruff formatting to pass CI checks
|
||||
- **calendar**: address PR feedback from maintainer
|
||||
- apply ruff formatting to test_webdav_operations.py
|
||||
- **deps**: update dependency mcp to >=1.10,<1.11
|
||||
- update tests
|
||||
- Commitizen release process
|
||||
- Do not update dependencies when running in Dockerfile
|
||||
- Configure logging
|
||||
- Limit search results to notes with score > 0.5
|
||||
- Install deps before checking service
|
||||
- **deps**: update dependency mcp to >=1.9,<1.10
|
||||
|
||||
### Refactor
|
||||
|
||||
- Transform document parsing into pluggable processor architecture
|
||||
- Update JWT client to use DCR, re-enable tool filtering
|
||||
- Migrate from internal CalendarClient to caldav library
|
||||
- Unify logging & remove factory deployment
|
||||
- Add tools for all resources to enable tool-only workflows
|
||||
- Add `http` to --transport option
|
||||
- Use _make_request where available
|
||||
- **calendar**: optimize logging for production readiness
|
||||
- Modularize NC and Notes app client
|
||||
|
||||
### Perf
|
||||
|
||||
- **notes**: Improve notes search performance using async iterators
|
||||
@@ -1,9 +1,9 @@
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
version: 1.16.3
|
||||
version: 1.16.0
|
||||
- name: ollama
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
version: 1.36.0
|
||||
digest: sha256:7f0979ec4110ff41ebeb55bf586b41366a350cc39fe65a2da7d2da03f723fe9b
|
||||
generated: "2025-12-22T11:09:39.166328543Z"
|
||||
version: 1.34.0
|
||||
digest: sha256:9dfb8d6e3d5488f669d4c37f3a766213b598ff3de2aead2c734789736c7835b4
|
||||
generated: "2025-11-17T17:08:48.055530019Z"
|
||||
|
||||
@@ -2,8 +2,8 @@ apiVersion: v2
|
||||
name: nextcloud-mcp-server
|
||||
description: A Helm chart for Nextcloud MCP Server - enables AI assistants to interact with Nextcloud
|
||||
type: application
|
||||
version: 0.56.0
|
||||
appVersion: "0.60.0"
|
||||
version: 0.44.0
|
||||
appVersion: "0.44.0"
|
||||
keywords:
|
||||
- nextcloud
|
||||
- mcp
|
||||
@@ -27,10 +27,10 @@ annotations:
|
||||
grafana_dashboard_folder: "Nextcloud MCP"
|
||||
dependencies:
|
||||
- name: qdrant
|
||||
version: "1.16.3"
|
||||
version: "1.16.0"
|
||||
repository: https://qdrant.github.io/qdrant-helm
|
||||
condition: qdrant.networkMode.deploySubchart
|
||||
- name: ollama
|
||||
version: "1.36.0"
|
||||
version: "1.34.0"
|
||||
repository: https://otwld.github.io/ollama-helm
|
||||
condition: ollama.enabled
|
||||
|
||||
@@ -99,11 +99,11 @@ ingress:
|
||||
|-----------|-------------|---------|
|
||||
| `nextcloud.host` | URL of your Nextcloud instance (required) | `""` |
|
||||
| `nextcloud.mcpServerUrl` | MCP server URL for OAuth callbacks (OAuth only, optional) | Smart default* |
|
||||
| `nextcloud.publicIssuerUrl` | Public URL for browser-accessible OAuth authorization endpoint (OAuth only, optional) | Smart default** |
|
||||
| `nextcloud.publicIssuerUrl` | Public issuer URL for OAuth (OAuth only, optional) | Smart default** |
|
||||
|
||||
**Smart Defaults:**
|
||||
- `*mcpServerUrl`: If not set, automatically uses ingress host (if enabled) or `http://localhost:8000` (for port-forward setups)
|
||||
- `**publicIssuerUrl`: If not set, defaults to `nextcloud.host`. **Only used for authorization endpoints** that browsers must access. All server-to-server endpoints (token, JWKS, introspection, userinfo) use URLs from OIDC discovery without rewriting
|
||||
- `**publicIssuerUrl`: If not set, automatically defaults to `nextcloud.host` (which works when both clients and MCP server access Nextcloud at the same URL)
|
||||
|
||||
#### Authentication
|
||||
|
||||
@@ -208,16 +208,16 @@ The application exposes HTTP health check endpoints:
|
||||
|
||||
#### Vector Search & Semantic Capabilities (Optional)
|
||||
|
||||
Enable semantic search capabilities with BM25 hybrid search by deploying a vector database (Qdrant) and embedding service (Ollama or OpenAI).
|
||||
Enable semantic search capabilities by deploying a vector database (Qdrant) and embedding service (Ollama or OpenAI).
|
||||
|
||||
**Semantic Search Configuration:**
|
||||
**Vector Sync Configuration:**
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `semanticSearch.enabled` | Enable semantic search and background vector synchronization | `false` |
|
||||
| `semanticSearch.scanInterval` | Scan interval in seconds | `3600` |
|
||||
| `semanticSearch.processorWorkers` | Number of concurrent processor workers | `3` |
|
||||
| `semanticSearch.queueMaxSize` | Maximum queue size for pending documents | `10000` |
|
||||
| `vectorSync.enabled` | Enable background vector synchronization | `false` |
|
||||
| `vectorSync.scanInterval` | Scan interval in seconds | `3600` |
|
||||
| `vectorSync.processorWorkers` | Number of concurrent processor workers | `3` |
|
||||
| `vectorSync.queueMaxSize` | Maximum queue size for pending documents | `10000` |
|
||||
|
||||
**Document Chunking Configuration:**
|
||||
|
||||
@@ -427,7 +427,7 @@ nextcloud:
|
||||
host: https://cloud.example.com
|
||||
# mcpServerUrl and publicIssuerUrl are optional!
|
||||
# If not set, mcpServerUrl defaults to ingress host or localhost
|
||||
# publicIssuerUrl defaults to nextcloud.host (only used for browser-accessible auth endpoint)
|
||||
# publicIssuerUrl defaults to nextcloud.host
|
||||
|
||||
auth:
|
||||
mode: oauth
|
||||
@@ -459,7 +459,7 @@ This example shows OAuth without pre-registered credentials (using DCR) and opti
|
||||
nextcloud:
|
||||
host: https://cloud.example.com
|
||||
# mcpServerUrl will automatically use ingress host (https://mcp.example.com)
|
||||
# publicIssuerUrl will automatically default to nextcloud.host (only used for browser-accessible auth endpoint)
|
||||
# publicIssuerUrl will automatically default to nextcloud.host
|
||||
|
||||
auth:
|
||||
mode: oauth
|
||||
@@ -537,8 +537,8 @@ auth:
|
||||
username: admin
|
||||
password: secure-password
|
||||
|
||||
# Enable semantic search
|
||||
semanticSearch:
|
||||
# Enable vector sync
|
||||
vectorSync:
|
||||
enabled: true
|
||||
scanInterval: 1800 # Scan every 30 minutes
|
||||
processorWorkers: 5
|
||||
@@ -576,7 +576,7 @@ ollama:
|
||||
Or use an external Ollama instance:
|
||||
|
||||
```yaml
|
||||
semanticSearch:
|
||||
vectorSync:
|
||||
enabled: true
|
||||
|
||||
qdrant:
|
||||
@@ -592,7 +592,7 @@ ollama:
|
||||
Or use OpenAI for embeddings:
|
||||
|
||||
```yaml
|
||||
semanticSearch:
|
||||
vectorSync:
|
||||
enabled: true
|
||||
|
||||
qdrant:
|
||||
@@ -689,9 +689,7 @@ Readiness (returns 200 if ready, 503 if not ready):
|
||||
|
||||
1. **Connection refused to Nextcloud**
|
||||
- Verify `nextcloud.host` is accessible from the Kubernetes cluster
|
||||
- For OAuth mode: Ensure MCP server can reach OIDC discovery endpoints (token, JWKS, introspection, userinfo URLs)
|
||||
- Check network policies and firewall rules
|
||||
- Note: Do not use internal Docker hostnames (like `http://app:80`) for `nextcloud.host` - use externally resolvable URLs
|
||||
|
||||
2. **Authentication failures**
|
||||
- For basic auth: verify username/password are correct
|
||||
|
||||
@@ -69,12 +69,12 @@ Your Nextcloud MCP Server has been deployed in {{ .Values.auth.mode }} authentic
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{- if .Values.semanticSearch.enabled }}
|
||||
{{- if .Values.vectorSync.enabled }}
|
||||
|
||||
5. Semantic Search & Vector Capabilities:
|
||||
- Semantic Search: Enabled
|
||||
- Scan Interval: {{ .Values.semanticSearch.scanInterval }}s
|
||||
- Processor Workers: {{ .Values.semanticSearch.processorWorkers }}
|
||||
5. Vector Search & Semantic Capabilities:
|
||||
- Vector Sync: Enabled
|
||||
- Scan Interval: {{ .Values.vectorSync.scanInterval }}s
|
||||
- Processor Workers: {{ .Values.vectorSync.processorWorkers }}
|
||||
{{- if .Values.qdrant.enabled }}
|
||||
- Qdrant: Deployed as subchart ({{ .Release.Name }}-qdrant:6333)
|
||||
{{- else }}
|
||||
|
||||
@@ -72,28 +72,6 @@ Create the name of the secret to use for basic auth
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the secret to use for multi-user basic auth
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.multiUserBasicSecretName" -}}
|
||||
{{- if .Values.auth.multiUserBasic.existingSecret }}
|
||||
{{- .Values.auth.multiUserBasic.existingSecret }}
|
||||
{{- else }}
|
||||
{{- include "nextcloud-mcp-server.fullname" . }}-multi-user-basic
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the PVC to use for multi-user basic token storage
|
||||
*/}}
|
||||
{{- define "nextcloud-mcp-server.multiUserBasicPvcName" -}}
|
||||
{{- if .Values.auth.multiUserBasic.persistence.existingClaim }}
|
||||
{{- .Values.auth.multiUserBasic.persistence.existingClaim }}
|
||||
{{- else }}
|
||||
{{- include "nextcloud-mcp-server.fullname" . }}-token-storage
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the secret to use for OAuth
|
||||
*/}}
|
||||
|
||||
@@ -68,7 +68,7 @@ spec:
|
||||
- name: NEXTCLOUD_HOST
|
||||
value: {{ .Values.nextcloud.host | quote }}
|
||||
{{- if eq .Values.auth.mode "basic" }}
|
||||
# Basic auth mode (single-user)
|
||||
# Basic auth mode
|
||||
- name: NEXTCLOUD_USERNAME
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
@@ -79,41 +79,6 @@ spec:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.basicAuthSecretName" . }}
|
||||
key: {{ .Values.auth.basic.passwordKey }}
|
||||
{{- else if eq .Values.auth.mode "multi-user-basic" }}
|
||||
# Multi-user BasicAuth mode (pass-through)
|
||||
- name: ENABLE_MULTI_USER_BASIC_AUTH
|
||||
value: "true"
|
||||
- name: NEXTCLOUD_MCP_SERVER_URL
|
||||
value: {{ include "nextcloud-mcp-server.mcpServerUrl" . | quote }}
|
||||
- name: NEXTCLOUD_PUBLIC_ISSUER_URL
|
||||
value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
|
||||
{{- if .Values.auth.multiUserBasic.enableOfflineAccess }}
|
||||
# Background operations with app passwords (replaces deprecated ENABLE_OFFLINE_ACCESS)
|
||||
- name: ENABLE_BACKGROUND_OPERATIONS
|
||||
value: "true"
|
||||
- name: TOKEN_STORAGE_DB
|
||||
value: {{ .Values.auth.multiUserBasic.tokenStorageDb | quote }}
|
||||
- name: TOKEN_ENCRYPTION_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
|
||||
key: {{ .Values.auth.multiUserBasic.tokenEncryptionKeyKey }}
|
||||
- name: NEXTCLOUD_OIDC_SCOPES
|
||||
value: {{ .Values.auth.multiUserBasic.scopes | quote }}
|
||||
{{- if or .Values.auth.multiUserBasic.clientId .Values.auth.multiUserBasic.existingSecret }}
|
||||
# Static OAuth credentials (optional - uses DCR if not provided)
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
|
||||
key: {{ .Values.auth.multiUserBasic.clientIdKey }}
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{ include "nextcloud-mcp-server.multiUserBasicSecretName" . }}
|
||||
key: {{ .Values.auth.multiUserBasic.clientSecretKey }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- else if eq .Values.auth.mode "oauth" }}
|
||||
# OAuth mode
|
||||
- name: NEXTCLOUD_MCP_SERVER_URL
|
||||
@@ -122,7 +87,7 @@ spec:
|
||||
value: {{ include "nextcloud-mcp-server.publicIssuerUrl" . | quote }}
|
||||
- name: NEXTCLOUD_OIDC_SCOPES
|
||||
value: {{ .Values.auth.oauth.scopes | quote }}
|
||||
{{- if or .Values.auth.oauth.clientId .Values.auth.oauth.existingSecret }}
|
||||
{{- if .Values.auth.oauth.clientId }}
|
||||
- name: NEXTCLOUD_OIDC_CLIENT_ID
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
@@ -182,16 +147,16 @@ spec:
|
||||
value: {{ .Values.documentProcessing.custom.types | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
# Semantic Search (replaces deprecated VECTOR_SYNC_ENABLED)
|
||||
- name: ENABLE_SEMANTIC_SEARCH
|
||||
value: {{ .Values.semanticSearch.enabled | quote }}
|
||||
{{- if .Values.semanticSearch.enabled }}
|
||||
# Vector Sync
|
||||
- name: VECTOR_SYNC_ENABLED
|
||||
value: {{ .Values.vectorSync.enabled | quote }}
|
||||
{{- if .Values.vectorSync.enabled }}
|
||||
- name: VECTOR_SYNC_SCAN_INTERVAL
|
||||
value: {{ .Values.semanticSearch.scanInterval | quote }}
|
||||
value: {{ .Values.vectorSync.scanInterval | quote }}
|
||||
- name: VECTOR_SYNC_PROCESSOR_WORKERS
|
||||
value: {{ .Values.semanticSearch.processorWorkers | quote }}
|
||||
value: {{ .Values.vectorSync.processorWorkers | quote }}
|
||||
- name: VECTOR_SYNC_QUEUE_MAX_SIZE
|
||||
value: {{ .Values.semanticSearch.queueMaxSize | quote }}
|
||||
value: {{ .Values.vectorSync.queueMaxSize | quote }}
|
||||
{{- end }}
|
||||
# Document Chunking (always set, used by vector sync processor)
|
||||
- name: DOCUMENT_CHUNK_SIZE
|
||||
@@ -286,10 +251,6 @@ spec:
|
||||
- name: oauth-storage
|
||||
mountPath: /app/.oauth
|
||||
{{- end }}
|
||||
{{- if and (eq .Values.auth.mode "multi-user-basic") .Values.auth.multiUserBasic.enableOfflineAccess .Values.auth.multiUserBasic.persistence.enabled }}
|
||||
- name: token-storage
|
||||
mountPath: /app/data
|
||||
{{- end }}
|
||||
{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
|
||||
- name: qdrant-data
|
||||
mountPath: /app/data
|
||||
@@ -305,11 +266,6 @@ spec:
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "nextcloud-mcp-server.oauthPvcName" . }}
|
||||
{{- end }}
|
||||
{{- if and (eq .Values.auth.mode "multi-user-basic") .Values.auth.multiUserBasic.enableOfflineAccess .Values.auth.multiUserBasic.persistence.enabled }}
|
||||
- name: token-storage
|
||||
persistentVolumeClaim:
|
||||
claimName: {{ include "nextcloud-mcp-server.multiUserBasicPvcName" . }}
|
||||
{{- end }}
|
||||
{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled }}
|
||||
- name: qdrant-data
|
||||
persistentVolumeClaim:
|
||||
|
||||
@@ -16,24 +16,6 @@ spec:
|
||||
storage: {{ .Values.auth.oauth.persistence.size }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if and (eq .Values.auth.mode "multi-user-basic") .Values.auth.multiUserBasic.enableOfflineAccess .Values.auth.multiUserBasic.persistence.enabled (not .Values.auth.multiUserBasic.persistence.existingClaim) }}
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}-token-storage
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
spec:
|
||||
accessModes:
|
||||
- {{ .Values.auth.multiUserBasic.persistence.accessMode }}
|
||||
{{- if .Values.auth.multiUserBasic.persistence.storageClass }}
|
||||
storageClassName: {{ .Values.auth.multiUserBasic.persistence.storageClass }}
|
||||
{{- end }}
|
||||
resources:
|
||||
requests:
|
||||
storage: {{ .Values.auth.multiUserBasic.persistence.size }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if and (eq .Values.qdrant.mode "persistent") .Values.qdrant.localPersistence.enabled (not .Values.qdrant.localPersistence.existingClaim) }}
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
|
||||
@@ -13,24 +13,6 @@ data:
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if eq .Values.auth.mode "multi-user-basic" }}
|
||||
{{- if and .Values.auth.multiUserBasic.enableOfflineAccess (not .Values.auth.multiUserBasic.existingSecret) }}
|
||||
apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{ include "nextcloud-mcp-server.fullname" . }}-multi-user-basic
|
||||
labels:
|
||||
{{- include "nextcloud-mcp-server.labels" . | nindent 4 }}
|
||||
type: Opaque
|
||||
data:
|
||||
{{ .Values.auth.multiUserBasic.tokenEncryptionKeyKey }}: {{ .Values.auth.multiUserBasic.tokenEncryptionKey | b64enc | quote }}
|
||||
{{- if .Values.auth.multiUserBasic.clientId }}
|
||||
{{ .Values.auth.multiUserBasic.clientIdKey }}: {{ .Values.auth.multiUserBasic.clientId | b64enc | quote }}
|
||||
{{ .Values.auth.multiUserBasic.clientSecretKey }}: {{ .Values.auth.multiUserBasic.clientSecret | b64enc | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
---
|
||||
{{- if eq .Values.auth.mode "oauth" }}
|
||||
{{- if and .Values.auth.oauth.clientId (not .Values.auth.oauth.existingSecret) }}
|
||||
apiVersion: v1
|
||||
|
||||
@@ -26,29 +26,21 @@ nextcloud:
|
||||
# Example: https://mcp.example.com
|
||||
mcpServerUrl: ""
|
||||
|
||||
# Public issuer URL for browser-accessible OAuth authorization endpoints (OAuth mode only)
|
||||
# ONLY used to make authorization endpoints accessible to users' browsers
|
||||
# All server-to-server communication (token endpoint, JWKS, introspection, userinfo)
|
||||
# uses URLs from OIDC discovery without any rewriting
|
||||
#
|
||||
# Use case: When MCP server accesses Nextcloud at one URL but browsers need a different
|
||||
# public URL for OAuth login (e.g., server uses internal DNS, browsers use public domain)
|
||||
#
|
||||
# If not specified, defaults to nextcloud.host (works when MCP server and browsers
|
||||
# both access Nextcloud at the same URL)
|
||||
# Public issuer URL for OAuth (OAuth mode only)
|
||||
# If not specified, defaults to nextcloud.host
|
||||
# Only set this if your Nextcloud is accessible at a different URL for OAuth
|
||||
# Example: https://cloud.example.com
|
||||
publicIssuerUrl: ""
|
||||
|
||||
# Authentication configuration
|
||||
# Choose one mode: "basic", "multi-user-basic", or "oauth"
|
||||
# Choose either basic auth OR oauth (not both)
|
||||
auth:
|
||||
# Authentication mode: "basic", "multi-user-basic", or "oauth"
|
||||
# basic: Single-user with username/password (recommended for personal use)
|
||||
# multi-user-basic: Multi-user with BasicAuth pass-through (credentials in request headers)
|
||||
# Authentication mode: "basic" or "oauth"
|
||||
# basic: Uses username/password (recommended for most users)
|
||||
# oauth: Uses OAuth2/OIDC (experimental, requires patches)
|
||||
mode: basic
|
||||
|
||||
# Basic authentication settings (single-user mode)
|
||||
# Basic authentication settings
|
||||
basic:
|
||||
# Nextcloud username (ignored if existingSecret is set)
|
||||
username: ""
|
||||
@@ -66,47 +58,6 @@ auth:
|
||||
usernameKey: "username"
|
||||
passwordKey: "password"
|
||||
|
||||
# Multi-user BasicAuth settings (pass-through mode)
|
||||
# Users provide credentials in request headers (Authorization: Basic ...)
|
||||
# Server optionally stores app passwords for background operations
|
||||
multiUserBasic:
|
||||
# Enable offline access (background operations using app passwords via Astrolabe)
|
||||
# When enabled, requires token encryption key. OAuth client credentials are optional (uses DCR if not provided)
|
||||
enableOfflineAccess: false
|
||||
# Token encryption key (required if enableOfflineAccess: true, ignored if existingSecret is set)
|
||||
# Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
tokenEncryptionKey: ""
|
||||
# Token storage database path
|
||||
tokenStorageDb: "/app/data/tokens.db"
|
||||
# OAuth client credentials (optional - uses Dynamic Client Registration if not provided)
|
||||
# Only needed if enableOfflineAccess: true
|
||||
clientId: ""
|
||||
clientSecret: ""
|
||||
# OAuth scopes to request (space-separated)
|
||||
scopes: "openid profile email offline_access notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write"
|
||||
# Use existing secret for multi-user basic auth credentials
|
||||
# If set, tokenEncryptionKey, clientId, and clientSecret above are ignored
|
||||
# Secret should contain keys specified in the *Key fields below
|
||||
# Example:
|
||||
# kubectl create secret generic my-multiuser-creds \
|
||||
# --from-literal=token_encryption_key=ESF1BvEQ... \
|
||||
# --from-literal=client_id=my-client-id \
|
||||
# --from-literal=client_secret=my-client-secret
|
||||
existingSecret: ""
|
||||
# Keys in the existing secret
|
||||
tokenEncryptionKeyKey: "token_encryption_key"
|
||||
clientIdKey: "client_id"
|
||||
clientSecretKey: "client_secret"
|
||||
# Persistent storage for token database
|
||||
persistence:
|
||||
enabled: true
|
||||
# Storage class (leave empty for default)
|
||||
storageClass: ""
|
||||
accessMode: ReadWriteOnce
|
||||
size: 100Mi
|
||||
# Use existing PVC
|
||||
existingClaim: ""
|
||||
|
||||
# OAuth2/OIDC settings (experimental)
|
||||
oauth:
|
||||
# OAuth token type: "jwt" or "opaque"
|
||||
@@ -365,11 +316,10 @@ extraEnvFrom: []
|
||||
# - secretRef:
|
||||
# name: my-secret
|
||||
|
||||
# Semantic Search Configuration
|
||||
# Enable semantic search with BM25 hybrid search and background synchronization
|
||||
# of Nextcloud content into vector database
|
||||
semanticSearch:
|
||||
# Enable semantic search and background vector synchronization
|
||||
# Vector Sync Configuration
|
||||
# Background synchronization of Nextcloud content into vector database for semantic search
|
||||
vectorSync:
|
||||
# Enable background vector synchronization
|
||||
enabled: false
|
||||
# Scan interval in seconds (how often to check for changes)
|
||||
scanInterval: 3600
|
||||
@@ -380,7 +330,7 @@ semanticSearch:
|
||||
|
||||
# Document Chunking Configuration
|
||||
# Controls how documents are split into chunks before embedding
|
||||
# Only relevant when semanticSearch.enabled is true
|
||||
# Only relevant when vectorSync.enabled is true
|
||||
documentChunking:
|
||||
# Number of words per chunk (default: 512)
|
||||
# Smaller chunks (256-384): Better for precise searches, more chunks to store
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
# CI-specific overrides for RAG evaluation pipeline
|
||||
# This file is used by the rag-evaluation.yml workflow to configure the MCP
|
||||
# container with OpenAI/GitHub Models API for vector embeddings.
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f docker-compose.yml -f docker-compose.ci.yml up
|
||||
#
|
||||
# Environment variables (set in CI workflow):
|
||||
# OPENAI_API_KEY - API key for embeddings (GitHub Models uses GITHUB_TOKEN)
|
||||
# OPENAI_BASE_URL - API endpoint (e.g., https://models.github.ai/inference)
|
||||
# OPENAI_EMBEDDING_MODEL - Model name (e.g., openai/text-embedding-3-small)
|
||||
# OPENAI_GENERATION_MODEL - Model name for generation (e.g., openai/gpt-4o-mini)
|
||||
|
||||
services:
|
||||
mcp:
|
||||
environment:
|
||||
# OpenAI provider configuration (required for CI vector sync)
|
||||
- OPENAI_API_KEY=${OPENAI_API_KEY}
|
||||
- OPENAI_BASE_URL=${OPENAI_BASE_URL:-https://models.github.ai/inference}
|
||||
- OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-openai/text-embedding-3-small}
|
||||
- OPENAI_GENERATION_MODEL=${OPENAI_GENERATION_MODEL:-openai/gpt-4o-mini}
|
||||
# Faster sync for CI
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=${VECTOR_SYNC_SCAN_INTERVAL:-5}
|
||||
# Enable document processing for PDF parsing
|
||||
- ENABLE_DOCUMENT_PROCESSING=true
|
||||
+9
-82
@@ -17,11 +17,11 @@ services:
|
||||
# Note: Redis is an external service. You can find more information about the configuration here:
|
||||
# https://hub.docker.com/_/redis
|
||||
redis:
|
||||
image: docker.io/library/redis:alpine@sha256:6cbef353e480a8a6e7f10ec545f13d7d3fa85a212cdcc5ffaf5a1c818b9d3798
|
||||
image: docker.io/library/redis:alpine@sha256:5013e94192ef18a5d8368179c7522e5300f9265cc339cadac76c7b93303a2752
|
||||
restart: always
|
||||
|
||||
app:
|
||||
image: docker.io/library/nextcloud:32.0.3@sha256:53231a9fb9233af2c15bfe70fc03ebe639fd53243fa42a9369884b1e0008deae
|
||||
image: docker.io/library/nextcloud:32.0.1@sha256:d572839eeb693026d72a0c6aa48076df0bb8930797ea321e604936ef7189d06e
|
||||
restart: always
|
||||
ports:
|
||||
- 0.0.0.0:8080:80
|
||||
@@ -34,8 +34,7 @@ services:
|
||||
- ./app-hooks:/docker-entrypoint-hooks.d:ro
|
||||
# Mount OIDC development directory outside /var/www/html to avoid rsync conflicts
|
||||
# The post-installation hook will register /opt/apps as an additional app directory
|
||||
#- ./third_party:/opt/apps:ro
|
||||
- ./third_party/astrolabe:/opt/apps/astrolabe:ro
|
||||
- ./third_party:/opt/apps:ro
|
||||
environment:
|
||||
- NEXTCLOUD_TRUSTED_DOMAINS=app
|
||||
- NEXTCLOUD_ADMIN_USER=admin
|
||||
@@ -52,7 +51,7 @@ services:
|
||||
retries: 30
|
||||
|
||||
recipes:
|
||||
image: docker.io/library/nginx:alpine@sha256:052b75ab72f690f33debaa51c7e08d9b969a0447a133eb2b99cc905d9188cb2b
|
||||
image: docker.io/library/nginx:alpine@sha256:b3c656d55d7ad751196f21b7fd2e8d4da9cb430e32f646adcf92441b72f82b14
|
||||
restart: always
|
||||
volumes:
|
||||
- ./tests/fixtures/test_recipe.html:/usr/share/nginx/html/test_recipe.html:ro
|
||||
@@ -87,7 +86,7 @@ services:
|
||||
- NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
|
||||
|
||||
# Vector sync configuration (ADR-007)
|
||||
#- VECTOR_SYNC_ENABLED=true
|
||||
- VECTOR_SYNC_ENABLED=true
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=60
|
||||
- VECTOR_SYNC_PROCESSOR_WORKERS=1
|
||||
|
||||
@@ -123,41 +122,6 @@ services:
|
||||
# - DOCUMENT_CHUNK_SIZE=512 # Words per chunk (default: 512)
|
||||
# - DOCUMENT_CHUNK_OVERLAP=50 # Overlapping words (default: 50, recommended: 10-20% of chunk size)
|
||||
|
||||
mcp-multi-user-basic:
|
||||
build: .
|
||||
restart: always
|
||||
command: ["--transport", "streamable-http"]
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- 127.0.0.1:8003:8000
|
||||
environment:
|
||||
# Multi-user BasicAuth pass-through mode (ADR-020)
|
||||
- NEXTCLOUD_HOST=http://app:80
|
||||
- NEXTCLOUD_MCP_SERVER_URL=http://localhost:8003
|
||||
- NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8080
|
||||
- ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
#- ENABLE_OFFLINE_ACCESS=true
|
||||
- ENABLE_BACKGROUND_OPERATIONS=true
|
||||
|
||||
# Token storage (required for middleware initialization)
|
||||
- TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
|
||||
- TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
- VECTOR_SYNC_ENABLED=true
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=60
|
||||
- VECTOR_SYNC_PROCESSOR_WORKERS=1
|
||||
|
||||
# OAuth credentials for background sync (optional - uses DCR if not provided)
|
||||
# Uncomment to avoid DCR:
|
||||
# - NEXTCLOUD_OIDC_CLIENT_ID=your_client_id
|
||||
# - NEXTCLOUD_OIDC_CLIENT_SECRET=your_client_secret
|
||||
|
||||
# NO admin credentials - credentials come from client Authorization header
|
||||
volumes:
|
||||
- multi-user-basic-data:/app/data
|
||||
|
||||
mcp-oauth:
|
||||
build: .
|
||||
command: ["--transport", "streamable-http", "--oauth", "--port", "8001", "--oauth-token-type", "jwt"]
|
||||
@@ -178,8 +142,7 @@ services:
|
||||
- NEXTCLOUD_OIDC_SCOPES=openid profile email notes:read notes:write calendar:read calendar:write contacts:read contacts:write cookbook:read cookbook:write deck:read deck:write tables:read tables:write files:read files:write sharing:read sharing:write todo:read todo:write
|
||||
|
||||
# Refresh token storage (ADR-002 Tier 1)
|
||||
#- ENABLE_OFFLINE_ACCESS=true
|
||||
- ENABLE_BACKGROUND_OPERATIONS=true
|
||||
- ENABLE_OFFLINE_ACCESS=true
|
||||
- TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
|
||||
- TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
@@ -187,20 +150,6 @@ services:
|
||||
# Tokens must contain BOTH MCP and Nextcloud audiences
|
||||
# No token exchange needed - tokens work for both MCP auth and Nextcloud APIs
|
||||
|
||||
# Vector sync configuration (ADR-007)
|
||||
- ENABLE_SEMANTIC_SEARCH=true
|
||||
#- VECTOR_SYNC_ENABLED=true
|
||||
- VECTOR_SYNC_SCAN_INTERVAL=60
|
||||
- VECTOR_SYNC_PROCESSOR_WORKERS=1
|
||||
|
||||
# Qdrant configuration - persistent local storage
|
||||
- QDRANT_LOCATION=/app/data/qdrant
|
||||
|
||||
# Embedding provider for vector sync (use Simple provider as fallback)
|
||||
# Ollama not available in CI/test environments
|
||||
# - OLLAMA_BASE_URL=http://ollama:11434
|
||||
# - OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# NO admin credentials - using OAuth with Dynamic Client Registration (DCR)
|
||||
# Client credentials registered via RFC 7591 and stored in volume
|
||||
# JWT token type is used for testing (faster validation, scopes embedded in token)
|
||||
@@ -209,7 +158,7 @@ services:
|
||||
- oauth-tokens:/app/data
|
||||
|
||||
keycloak:
|
||||
image: quay.io/keycloak/keycloak:26.4.7@sha256:9409c59bdfb65dbffa20b11e6f18b8abb9281d480c7ca402f51ed3d5977e6007
|
||||
image: quay.io/keycloak/keycloak:26.4.5@sha256:653852bfdea2be6e958b9e90a976eff1c6de34edd55f2f679bdc48ef16bc528e
|
||||
command:
|
||||
- "start-dev"
|
||||
- "--import-realm"
|
||||
@@ -257,8 +206,7 @@ services:
|
||||
- NEXTCLOUD_PUBLIC_ISSUER_URL=http://localhost:8888/realms/nextcloud-mcp
|
||||
|
||||
# Refresh token storage (ADR-002 Tier 1 & 2)
|
||||
#- ENABLE_OFFLINE_ACCESS=true
|
||||
- ENABLE_BACKGROUND_OPERATIONS=true
|
||||
- ENABLE_OFFLINE_ACCESS=true
|
||||
- TOKEN_ENCRYPTION_KEY=ESF1BvEQdGYsCluwMx9Cxvw3uh5pFowPH7Rg_nIliyo=
|
||||
- TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
@@ -276,28 +224,8 @@ services:
|
||||
- keycloak-tokens:/app/data
|
||||
- keycloak-oauth-storage:/app/.oauth
|
||||
|
||||
# Smithery stateless deployment mode (ADR-016)
|
||||
# Test with: docker compose --profile smithery up smithery
|
||||
# Then: curl http://localhost:8081/.well-known/mcp-config
|
||||
smithery:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: Dockerfile.smithery
|
||||
restart: always
|
||||
depends_on:
|
||||
app:
|
||||
condition: service_healthy
|
||||
ports:
|
||||
- 127.0.0.1:8081:8081
|
||||
environment:
|
||||
- SMITHERY_DEPLOYMENT=true
|
||||
- VECTOR_SYNC_ENABLED=false
|
||||
- PORT=8081
|
||||
profiles:
|
||||
- smithery
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:v1.16.2@sha256:dab6de32f7b2cc599985a7c764db3e8b062f70508fb85ca074aa856f829bf335
|
||||
image: qdrant/qdrant:v1.16.0@sha256:1005201498cf927d835383d0f918b17d8c9da7db58550f169f694455e42d78f4
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:6333:6333 # REST API
|
||||
@@ -323,4 +251,3 @@ volumes:
|
||||
keycloak-oauth-storage:
|
||||
qdrant-data:
|
||||
mcp-data:
|
||||
multi-user-basic-data:
|
||||
|
||||
@@ -1,492 +0,0 @@
|
||||
# ADR-016: Smithery Stateless Deployment for Multi-User Public Nextcloud Instances
|
||||
|
||||
**Status:** Proposed
|
||||
**Date:** 2025-01-22
|
||||
**Deciders:** Development Team
|
||||
**Related:** ADR-004 (OAuth), ADR-007 (Background Vector Sync), ADR-015 (Unified Provider)
|
||||
|
||||
## Context
|
||||
|
||||
[Smithery](https://smithery.ai) is a hosting platform and marketplace for MCP servers that provides:
|
||||
|
||||
- **Discovery**: Marketplace listing for MCP servers
|
||||
- **Hosting**: Containerized deployment with auto-scaling
|
||||
- **Authentication UI**: OAuth flow presentation for users
|
||||
- **Session Configuration**: Per-user settings passed via URL parameters
|
||||
- **Observability**: Usage logs and monitoring
|
||||
|
||||
### Current Architecture Limitations
|
||||
|
||||
The current nextcloud-mcp-server architecture assumes a **self-hosted deployment** with:
|
||||
|
||||
1. **Persistent Infrastructure**
|
||||
- Qdrant vector database for semantic search
|
||||
- Background sync worker for content indexing
|
||||
- Refresh token storage for offline access
|
||||
|
||||
2. **Single-Tenant Configuration**
|
||||
- Environment variables configure one Nextcloud instance
|
||||
- `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`
|
||||
- Or OAuth with a single IdP
|
||||
|
||||
3. **Stateful Operations**
|
||||
- Vector sync maintains index state across requests
|
||||
- Token storage persists between sessions
|
||||
|
||||
### Smithery Hosting Constraints
|
||||
|
||||
Smithery-hosted containers are **stateless by design**:
|
||||
|
||||
- No persistent storage between requests
|
||||
- No background workers or cron jobs
|
||||
- No databases (Qdrant, Redis, etc.)
|
||||
- Containers may be recycled at any time
|
||||
- Configuration passed per-session via URL parameters
|
||||
|
||||
### Opportunity
|
||||
|
||||
Many users have **publicly accessible Nextcloud instances** and want to:
|
||||
|
||||
1. Try the MCP server without self-hosting infrastructure
|
||||
2. Connect multiple users to different Nextcloud instances
|
||||
3. Use basic Nextcloud tools without semantic search
|
||||
4. Benefit from Smithery's discovery and OAuth UI
|
||||
|
||||
## Decision
|
||||
|
||||
Implement a **stateless deployment mode** for Smithery that:
|
||||
|
||||
1. **Disables stateful features** (vector sync, semantic search)
|
||||
2. **Creates clients per-session** from Smithery configuration
|
||||
3. **Supports multiple Nextcloud instances** via session config
|
||||
4. **Provides a useful subset of tools** that work without infrastructure
|
||||
|
||||
### Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ Smithery-Hosted Stateless Mode │
|
||||
├─────────────────────────────────────────────────────────────────────────┤
|
||||
│ │
|
||||
│ MCP Client Smithery │
|
||||
│ (Cursor, Claude) Infrastructure │
|
||||
│ │ │ │
|
||||
│ │ 1. Connect │ │
|
||||
│ ├───────────────────────────►│ │
|
||||
│ │ │ │
|
||||
│ │ 2. Config UI │ │
|
||||
│ │◄───────────────────────────┤ User enters: │
|
||||
│ │ (Smithery presents) │ - nextcloud_url │
|
||||
│ │ │ - auth_mode (basic/oauth) │
|
||||
│ │ │ - credentials │
|
||||
│ │ 3. Tool call │ │
|
||||
│ ├───────────────────────────►│ │
|
||||
│ │ + session config │ │
|
||||
│ │ │ │
|
||||
│ │ ┌───────┴───────┐ │
|
||||
│ │ │ MCP Server │ │
|
||||
│ │ │ Container │ │
|
||||
│ │ │ │ │
|
||||
│ │ │ 4. Create │ │
|
||||
│ │ │ client │ │
|
||||
│ │ │ from │ │
|
||||
│ │ │ config │ │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ ▼ │ │
|
||||
│ │ │ 5. Call │ │
|
||||
│ │ │ Nextcloud │───────► User's Nextcloud │
|
||||
│ │ │ API │ Instance │
|
||||
│ │ │ │ │ │
|
||||
│ │ │ ▼ │ │
|
||||
│ │ 6. Response │ Return result │ │
|
||||
│ │◄───────────────────┤ │ │
|
||||
│ │ └───────────────┘ │
|
||||
│ │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Session Configuration Schema
|
||||
|
||||
```python
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class SmitheryConfigSchema(BaseModel):
|
||||
"""Configuration schema for Smithery session."""
|
||||
|
||||
# Required: Nextcloud instance
|
||||
nextcloud_url: str = Field(
|
||||
...,
|
||||
description="Your Nextcloud instance URL (e.g., https://cloud.example.com)"
|
||||
)
|
||||
|
||||
# Authentication mode
|
||||
auth_mode: str = Field(
|
||||
"app_password",
|
||||
description="Authentication method: 'app_password' or 'oauth'"
|
||||
)
|
||||
|
||||
# App Password authentication (recommended for Smithery)
|
||||
username: str | None = Field(
|
||||
None,
|
||||
description="Nextcloud username (required for app_password auth)"
|
||||
)
|
||||
app_password: str | None = Field(
|
||||
None,
|
||||
description="Nextcloud app password (Settings → Security → App passwords)"
|
||||
)
|
||||
|
||||
# OAuth authentication (advanced)
|
||||
# When auth_mode='oauth', Smithery handles the OAuth flow
|
||||
# and passes the access token automatically
|
||||
```
|
||||
|
||||
### Feature Matrix
|
||||
|
||||
| Feature | Self-Hosted | Smithery Stateless |
|
||||
|---------|-------------|-------------------|
|
||||
| **Notes** | | |
|
||||
| List/Search notes | ✓ | ✓ |
|
||||
| Get/Create/Update notes | ✓ | ✓ |
|
||||
| Semantic search | ✓ | ✗ |
|
||||
| **Calendar** | | |
|
||||
| List calendars | ✓ | ✓ |
|
||||
| Get/Create events | ✓ | ✓ |
|
||||
| **Contacts** | | |
|
||||
| List address books | ✓ | ✓ |
|
||||
| Search/Get contacts | ✓ | ✓ |
|
||||
| **Files (WebDAV)** | | |
|
||||
| List/Download files | ✓ | ✓ |
|
||||
| Upload files | ✓ | ✓ |
|
||||
| Search files | ✓ | ✓ (keyword only) |
|
||||
| **Deck** | | |
|
||||
| List boards/cards | ✓ | ✓ |
|
||||
| Create/Update cards | ✓ | ✓ |
|
||||
| **Tables** | | |
|
||||
| List/Query tables | ✓ | ✓ |
|
||||
| Create/Update rows | ✓ | ✓ |
|
||||
| **Cookbook** | | |
|
||||
| List/Get recipes | ✓ | ✓ |
|
||||
| **Semantic Search** | | |
|
||||
| Vector search | ✓ | ✗ |
|
||||
| RAG answers | ✓ | ✗ |
|
||||
| **Background Sync** | | |
|
||||
| Auto-indexing | ✓ | ✗ |
|
||||
| Webhook sync | ✓ | ✗ |
|
||||
| **Admin UI (`/app`)** | | |
|
||||
| Vector sync status | ✓ | ✗ |
|
||||
| Vector visualization | ✓ | ✗ |
|
||||
| Webhook management | ✓ | ✗ |
|
||||
| Session management | ✓ | ✗ |
|
||||
|
||||
### Implementation
|
||||
|
||||
#### 1. Deployment Mode Detection
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/config.py
|
||||
|
||||
class DeploymentMode(Enum):
|
||||
SELF_HOSTED = "self_hosted" # Full features, env-based config
|
||||
SMITHERY_STATELESS = "smithery" # Stateless, session-based config
|
||||
|
||||
def get_deployment_mode() -> DeploymentMode:
|
||||
"""Detect deployment mode from environment."""
|
||||
if os.getenv("SMITHERY_DEPLOYMENT") == "true":
|
||||
return DeploymentMode.SMITHERY_STATELESS
|
||||
return DeploymentMode.SELF_HOSTED
|
||||
```
|
||||
|
||||
#### 2. Session-Based Client Factory
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/context.py
|
||||
|
||||
async def get_client(ctx: Context) -> NextcloudClient:
|
||||
"""Get NextcloudClient - from session config or environment."""
|
||||
|
||||
mode = get_deployment_mode()
|
||||
|
||||
if mode == DeploymentMode.SMITHERY_STATELESS:
|
||||
# Create client from Smithery session config
|
||||
config = ctx.session_config
|
||||
if not config:
|
||||
raise McpError("Session configuration required")
|
||||
|
||||
return NextcloudClient(
|
||||
base_url=config.nextcloud_url,
|
||||
username=config.username,
|
||||
password=config.app_password,
|
||||
)
|
||||
else:
|
||||
# Existing behavior: from environment or OAuth context
|
||||
return await _get_client_from_context(ctx)
|
||||
```
|
||||
|
||||
#### 3. Conditional Tool Registration
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/app.py
|
||||
|
||||
def create_mcp_server(mode: DeploymentMode) -> FastMCP:
|
||||
"""Create MCP server with mode-appropriate tools."""
|
||||
|
||||
mcp = FastMCP("Nextcloud MCP")
|
||||
|
||||
# Always register core tools
|
||||
configure_notes_tools(mcp)
|
||||
configure_calendar_tools(mcp)
|
||||
configure_contacts_tools(mcp)
|
||||
configure_webdav_tools(mcp)
|
||||
configure_deck_tools(mcp)
|
||||
configure_tables_tools(mcp)
|
||||
configure_cookbook_tools(mcp)
|
||||
|
||||
# Only register stateful tools in self-hosted mode
|
||||
if mode == DeploymentMode.SELF_HOSTED:
|
||||
configure_semantic_tools(mcp) # Requires Qdrant
|
||||
register_oauth_tools(mcp) # Requires token storage
|
||||
|
||||
return mcp
|
||||
```
|
||||
|
||||
#### 4. Exclude Admin UI Routes
|
||||
|
||||
The `/app` admin UI should **not be installed** in Smithery mode because:
|
||||
|
||||
- **Vector sync status** - No vector sync in stateless mode
|
||||
- **Vector visualization** - No Qdrant to visualize
|
||||
- **Webhook management** - No webhook sync without background workers
|
||||
- **Session management** - No persistent sessions to manage
|
||||
|
||||
```python
|
||||
# nextcloud_mcp_server/app.py
|
||||
|
||||
def create_app(mode: DeploymentMode) -> Starlette:
|
||||
"""Create Starlette app with mode-appropriate routes."""
|
||||
|
||||
routes = [
|
||||
Route("/health/live", health_live, methods=["GET"]),
|
||||
Route("/health/ready", health_ready, methods=["GET"]),
|
||||
]
|
||||
|
||||
# Only mount admin UI in self-hosted mode
|
||||
if mode == DeploymentMode.SELF_HOSTED:
|
||||
browser_app = create_browser_app()
|
||||
routes.append(
|
||||
Route("/app", lambda r: RedirectResponse("/app/", status_code=307))
|
||||
)
|
||||
routes.append(Mount("/app", app=browser_app))
|
||||
logger.info("Admin UI mounted at /app")
|
||||
else:
|
||||
logger.info("Admin UI disabled in Smithery stateless mode")
|
||||
|
||||
# Mount FastMCP at root
|
||||
mcp_app = create_mcp_server(mode).streamable_http_app()
|
||||
routes.append(Mount("/", app=mcp_app))
|
||||
|
||||
return Starlette(routes=routes, lifespan=starlette_lifespan)
|
||||
```
|
||||
|
||||
**Endpoints by Mode:**
|
||||
|
||||
| Endpoint | Self-Hosted | Smithery |
|
||||
|----------|-------------|----------|
|
||||
| `/mcp` | ✓ | ✓ |
|
||||
| `/health/live` | ✓ | ✓ |
|
||||
| `/health/ready` | ✓ | ✓ |
|
||||
| `/.well-known/mcp-config` | ✓ | ✓ |
|
||||
| `/app` | ✓ | ✗ |
|
||||
| `/app/vector-sync/status` | ✓ | ✗ |
|
||||
| `/app/vector-viz` | ✓ | ✗ |
|
||||
| `/app/webhooks` | ✓ | ✗ |
|
||||
|
||||
#### 5. Smithery Integration Files
|
||||
|
||||
**smithery.yaml:**
|
||||
```yaml
|
||||
runtime: "container"
|
||||
build:
|
||||
dockerfile: "Dockerfile.smithery"
|
||||
dockerBuildPath: "."
|
||||
startCommand:
|
||||
type: "http"
|
||||
configSchema:
|
||||
type: "object"
|
||||
required: ["nextcloud_url", "username", "app_password"]
|
||||
properties:
|
||||
nextcloud_url:
|
||||
type: "string"
|
||||
title: "Nextcloud URL"
|
||||
description: "Your Nextcloud instance URL (e.g., https://cloud.example.com)"
|
||||
username:
|
||||
type: "string"
|
||||
title: "Username"
|
||||
description: "Your Nextcloud username"
|
||||
app_password:
|
||||
type: "string"
|
||||
title: "App Password"
|
||||
description: "Generate at Settings → Security → App passwords"
|
||||
exampleConfig:
|
||||
nextcloud_url: "https://cloud.example.com"
|
||||
username: "alice"
|
||||
app_password: "xxxxx-xxxxx-xxxxx-xxxxx-xxxxx"
|
||||
```
|
||||
|
||||
**Dockerfile.smithery:**
|
||||
```dockerfile
|
||||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install uv
|
||||
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
|
||||
|
||||
# Copy project files
|
||||
COPY pyproject.toml uv.lock ./
|
||||
COPY nextcloud_mcp_server ./nextcloud_mcp_server
|
||||
|
||||
# Install dependencies (without vector/semantic extras)
|
||||
RUN uv sync --frozen --no-dev
|
||||
|
||||
# Set Smithery mode
|
||||
ENV SMITHERY_DEPLOYMENT=true
|
||||
ENV VECTOR_SYNC_ENABLED=false
|
||||
|
||||
# Smithery sets PORT=8081
|
||||
EXPOSE 8081
|
||||
|
||||
CMD ["uv", "run", "python", "-m", "nextcloud_mcp_server.smithery_main"]
|
||||
```
|
||||
|
||||
**nextcloud_mcp_server/smithery_main.py:**
|
||||
```python
|
||||
"""Smithery-specific entrypoint for stateless deployment."""
|
||||
|
||||
import os
|
||||
import uvicorn
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
from nextcloud_mcp_server.app import create_mcp_server
|
||||
from nextcloud_mcp_server.config import DeploymentMode
|
||||
|
||||
def main():
|
||||
# Force stateless mode
|
||||
os.environ["SMITHERY_DEPLOYMENT"] = "true"
|
||||
os.environ["VECTOR_SYNC_ENABLED"] = "false"
|
||||
|
||||
mcp = create_mcp_server(DeploymentMode.SMITHERY_STATELESS)
|
||||
app = mcp.streamable_http_app()
|
||||
|
||||
# Add CORS for browser-based clients
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["GET", "POST", "OPTIONS"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["mcp-session-id", "mcp-protocol-version"],
|
||||
)
|
||||
|
||||
# Smithery sets PORT environment variable
|
||||
port = int(os.environ.get("PORT", 8081))
|
||||
uvicorn.run(app, host="0.0.0.0", port=port)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
```
|
||||
|
||||
### Security Considerations
|
||||
|
||||
1. **App Passwords over User Passwords**
|
||||
- Smithery config encourages app passwords (revocable, scoped)
|
||||
- Documentation guides users to create dedicated app passwords
|
||||
- App passwords can be revoked without changing main password
|
||||
|
||||
2. **HTTPS Required**
|
||||
- `nextcloud_url` must be HTTPS for production use
|
||||
- Validation rejects HTTP URLs in Smithery mode
|
||||
|
||||
3. **No Credential Storage**
|
||||
- Credentials exist only for request duration
|
||||
- No server-side persistence of user credentials
|
||||
- Smithery handles secure config transmission
|
||||
|
||||
4. **Scope Limitation**
|
||||
- Stateless mode cannot access offline_access
|
||||
- No background operations on user's behalf
|
||||
- Clear user expectation: tools work during session only
|
||||
|
||||
### Migration Path
|
||||
|
||||
Users can start with Smithery stateless mode and migrate to self-hosted:
|
||||
|
||||
1. **Try on Smithery** → Basic tools, no setup
|
||||
2. **Self-host for semantic search** → Add Qdrant, enable vector sync
|
||||
3. **Full deployment** → Background sync, webhooks, multi-user OAuth
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Lower barrier to entry** - Users can try without infrastructure
|
||||
2. **Multi-user support** - Each session connects to different Nextcloud
|
||||
3. **Smithery ecosystem** - Discovery, observability, OAuth UI
|
||||
4. **Clear feature tiers** - Stateless (simple) vs self-hosted (full)
|
||||
|
||||
### Negative
|
||||
|
||||
1. **No semantic search** - Key differentiator unavailable on Smithery
|
||||
2. **Per-request auth** - Credentials sent with each request
|
||||
3. **No offline access** - Cannot perform background operations
|
||||
4. **Maintenance burden** - Two deployment modes to support
|
||||
|
||||
### Neutral
|
||||
|
||||
1. **Feature subset** - May encourage users to self-host for full features
|
||||
2. **Documentation needs** - Clear guidance on mode differences required
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### 1. External MCP Only
|
||||
|
||||
**Approach:** Only support self-hosted external MCP registration on Smithery.
|
||||
|
||||
**Rejected because:**
|
||||
- Higher barrier to entry for new users
|
||||
- Misses opportunity for Smithery marketplace visibility
|
||||
- Users want to try before committing to infrastructure
|
||||
|
||||
### 2. Embedded Vector DB (SQLite-vec)
|
||||
|
||||
**Approach:** Use SQLite with vector extensions for per-request indexing.
|
||||
|
||||
**Rejected because:**
|
||||
- No persistence between requests anyway
|
||||
- Indexing latency too high for synchronous requests
|
||||
- Complexity without benefit in stateless context
|
||||
|
||||
### 3. External Vector DB Service
|
||||
|
||||
**Approach:** Connect to Pinecone/Weaviate Cloud from Smithery container.
|
||||
|
||||
**Rejected because:**
|
||||
- Adds external dependency and cost
|
||||
- Per-user collections require complex multi-tenancy
|
||||
- Sync still impossible without background workers
|
||||
|
||||
### 4. Hybrid: Smithery + User's Qdrant
|
||||
|
||||
**Approach:** User provides their own Qdrant URL in session config.
|
||||
|
||||
**Considered for future:**
|
||||
- Could enable semantic search for advanced users
|
||||
- Adds complexity to session config
|
||||
- Sync still requires external trigger (manual or webhook)
|
||||
|
||||
## References
|
||||
|
||||
- [Smithery Documentation](https://smithery.ai/docs)
|
||||
- [Smithery Session Configuration](https://smithery.ai/docs/build/session-config)
|
||||
- [Smithery External MCPs](https://smithery.ai/docs/build/external)
|
||||
- [MCP Streamable HTTP Transport](https://modelcontextprotocol.io/docs/concepts/transports)
|
||||
- [Nextcloud App Passwords](https://docs.nextcloud.com/server/latest/user_manual/en/session_management.html#app-passwords)
|
||||
@@ -1,506 +0,0 @@
|
||||
# ADR-017: Add MCP Tool Annotations for Enhanced Client UX
|
||||
|
||||
## Status
|
||||
|
||||
Implemented
|
||||
|
||||
## Context
|
||||
|
||||
The MCP Python SDK supports tool annotations that provide behavioral hints and improved UX to MCP clients. Currently, our 101 tools across 10 modules lack these annotations, resulting in:
|
||||
|
||||
- Snake_case function names displayed to users (e.g., "nc_notes_create_note" instead of "Create Note")
|
||||
- No behavioral hints for clients about read-only, destructive, or idempotent operations
|
||||
- Missing parameter descriptions for better auto-completion and inline help
|
||||
- Clients cannot optimize caching, warn before destructive operations, or retry safely
|
||||
|
||||
### Available MCP Annotations
|
||||
|
||||
The MCP SDK provides three types of annotations:
|
||||
|
||||
#### 1. Tool Decorator Parameters
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Human-Readable Name",
|
||||
description="Tool description", # Can also come from docstring
|
||||
annotations=ToolAnnotations(...),
|
||||
icons=[Icon(...)] # Optional visual icons
|
||||
)
|
||||
```
|
||||
|
||||
#### 2. ToolAnnotations Behavioral Hints
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
ToolAnnotations(
|
||||
title="Alternative Title", # Decorator title takes precedence
|
||||
readOnlyHint=True, # Tool doesn't modify data
|
||||
destructiveHint=True, # Tool may delete/overwrite data
|
||||
idempotentHint=True, # Repeated calls with same args are safe
|
||||
openWorldHint=True # Interacts with external entities
|
||||
)
|
||||
```
|
||||
|
||||
#### 3. Parameter Descriptions
|
||||
```python
|
||||
from pydantic import Field
|
||||
|
||||
async def tool(
|
||||
param: str = Field(description="What this parameter does"),
|
||||
ctx: Context
|
||||
):
|
||||
```
|
||||
|
||||
### Idempotency Analysis
|
||||
|
||||
**Important**: Idempotency means calling with **the same inputs** produces the same result.
|
||||
|
||||
**NOT Idempotent** (different inputs each call):
|
||||
- **Updates with etag**: `update_note(id=1, title="X", etag="abc")` → etag changes to "def"
|
||||
- Second call: `update_note(id=1, title="X", etag="abc")` → fails (etag mismatch)
|
||||
- Different input (stale etag) → different result (error)
|
||||
- **Creates**: `create_note(title="X")` → creates note 1
|
||||
- Second call → creates note 2 (different result)
|
||||
- **Append operations**: `append_content(id=1, text="X")` → adds X once
|
||||
- Second call → adds X again (different result)
|
||||
|
||||
**Idempotent**:
|
||||
- **Deletes**: `delete_note(id=1)` → note deleted
|
||||
- Second call → 404 or success (same end state: note doesn't exist)
|
||||
- Note: May return different status code, but end state is identical
|
||||
- **Full resource PUT without version control**: `write_file(path="/test.txt", content="Hello")` → file has "Hello"
|
||||
- Second call → file still has "Hello" (same end state)
|
||||
- Example: `nc_webdav_write_file` uses HTTP PUT without etags/version control
|
||||
- **Set operations**: `set_property(id=1, value="X")` → property = X
|
||||
- Second call → property still = X (same result)
|
||||
- Note: Nextcloud updates with etags use version control, so not idempotent
|
||||
|
||||
**Read-Only** (always idempotent, never destructive):
|
||||
- All list, search, get operations
|
||||
|
||||
## Decision
|
||||
|
||||
Add annotations to all 101 tools in three phases:
|
||||
|
||||
### Phase 1: Titles (Quick Win)
|
||||
Add human-readable titles to all tools:
|
||||
|
||||
```python
|
||||
@mcp.tool(title="Create Note")
|
||||
async def nc_notes_create_note(...):
|
||||
```
|
||||
|
||||
**Effort**: 2-3 hours
|
||||
**Impact**: Immediate UX improvement
|
||||
|
||||
### Phase 2: ToolAnnotations (Behavioral Hints)
|
||||
Add annotations based on corrected categorization:
|
||||
|
||||
```python
|
||||
# Read-only tools
|
||||
@mcp.tool(
|
||||
title="Search Notes",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=True # Nextcloud is external to MCP server
|
||||
)
|
||||
)
|
||||
|
||||
# Delete tools (idempotent: same end state)
|
||||
@mcp.tool(
|
||||
title="Delete Note",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True,
|
||||
idempotentHint=True, # Deleting deleted item = same end state
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Create tools (not idempotent: creates multiple items)
|
||||
@mcp.tool(
|
||||
title="Create Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False,
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Update tools with etag (not idempotent: etag changes)
|
||||
@mcp.tool(
|
||||
title="Update Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # Etag required = different inputs each time
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
|
||||
# Append operations (not idempotent: adds content each time)
|
||||
@mcp.tool(
|
||||
title="Append to Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False,
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
```
|
||||
|
||||
**Effort**: 4-6 hours
|
||||
**Impact**: Better client behavior (caching, warnings, retry logic)
|
||||
|
||||
### Phase 3: Parameter Descriptions
|
||||
Add Field() descriptions to parameters:
|
||||
|
||||
```python
|
||||
from pydantic import Field
|
||||
|
||||
@mcp.tool(title="Create Note", annotations=ToolAnnotations(idempotentHint=False))
|
||||
async def nc_notes_create_note(
|
||||
title: str = Field(description="The title of the note"),
|
||||
content: str = Field(description="Markdown content of the note"),
|
||||
category: str = Field(description="Category or folder name for organizing"),
|
||||
ctx: Context
|
||||
) -> CreateNoteResponse:
|
||||
```
|
||||
|
||||
**Effort**: 6-8 hours
|
||||
**Impact**: Better auto-completion and inline help
|
||||
|
||||
## Tool Categorization
|
||||
|
||||
### Read-Only Tools (~40 tools)
|
||||
**Pattern**: List, search, get operations
|
||||
**Annotations**: `readOnlyHint=True`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_search_notes` → "Search Notes"
|
||||
- `nc_webdav_list_directory` → "List Files and Directories"
|
||||
- `nc_calendar_list_calendars` → "List Calendars"
|
||||
- `nc_contacts_get_contact` → "Get Contact"
|
||||
- `nc_semantic_search` → "Semantic Search"
|
||||
- `check_logged_in` → "Check Server Login Status"
|
||||
|
||||
### Create Tools (~20 tools)
|
||||
**Pattern**: Create new resources
|
||||
**Annotations**: `idempotentHint=False`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_create_note` → "Create Note"
|
||||
- `nc_calendar_create_event` → "Create Calendar Event"
|
||||
- `nc_contacts_create_contact` → "Create Contact"
|
||||
- `deck_create_card` → "Create Kanban Card"
|
||||
- `nc_tables_create_row` → "Create Table Row"
|
||||
|
||||
### Update Tools (~25 tools)
|
||||
**Pattern**: Modify existing resources with etag
|
||||
**Annotations**: `idempotentHint=False` (etag changes), `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_update_note` → "Update Note"
|
||||
- `nc_calendar_update_event` → "Update Calendar Event"
|
||||
- `nc_contacts_update_contact` → "Update Contact"
|
||||
- `deck_update_card` → "Update Kanban Card"
|
||||
|
||||
**Rationale**: Updates require etag, which changes after each update. Same parameters on second call will fail due to stale etag = NOT idempotent.
|
||||
|
||||
### Append/Accumulate Tools (~5 tools)
|
||||
**Pattern**: Add content without replacing
|
||||
**Annotations**: `idempotentHint=False`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_append_content` → "Append to Note"
|
||||
|
||||
**Rationale**: Each call adds content, changing the result = NOT idempotent.
|
||||
|
||||
### Delete Tools (~10 tools)
|
||||
**Pattern**: Remove resources
|
||||
**Annotations**: `destructiveHint=True`, `idempotentHint=True`, `openWorldHint=True`
|
||||
|
||||
Examples:
|
||||
- `nc_notes_delete_note` → "Delete Note"
|
||||
- `nc_webdav_delete_resource` → "Delete File or Directory"
|
||||
- `nc_calendar_delete_event` → "Delete Calendar Event"
|
||||
- `nc_contacts_delete_contact` → "Delete Contact"
|
||||
|
||||
**Rationale**: Deleting already-deleted item results in same end state (item doesn't exist) = idempotent. Status code may differ, but outcome is identical.
|
||||
|
||||
### Special Cases
|
||||
|
||||
#### OAuth Provisioning Tools
|
||||
```python
|
||||
# Not read-only but requires user interaction
|
||||
@mcp.tool(
|
||||
title="Grant Server Access to Nextcloud",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=False,
|
||||
idempotentHint=False, # Creates new OAuth session each time
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
async def provision_nextcloud_access(ctx: Context):
|
||||
```
|
||||
|
||||
#### Semantic Search (Closed World)
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Semantic Search",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True,
|
||||
openWorldHint=False # Searches only indexed Nextcloud data
|
||||
)
|
||||
)
|
||||
async def nc_semantic_search(query: str, ctx: Context):
|
||||
```
|
||||
|
||||
**Rationale**: Semantic search only queries pre-indexed Nextcloud content, not the "open world" like web search would.
|
||||
|
||||
## Tool Priority Matrix
|
||||
|
||||
### Critical Priority (~2 tools)
|
||||
OAuth tools required for server functionality:
|
||||
- `provision_nextcloud_access` → "Grant Server Access to Nextcloud"
|
||||
- `check_logged_in` → "Check Server Login Status"
|
||||
|
||||
### High Priority (~50 tools)
|
||||
Most commonly used modules:
|
||||
- **Notes** (14 tools): Create, read, update, delete notes
|
||||
- **WebDAV** (13 tools): File operations
|
||||
- **Calendar** (15 tools): Events and todos
|
||||
- **Semantic Search** (6 tools): AI-powered search
|
||||
- **Contacts** (9 tools): Address book operations
|
||||
|
||||
### Medium Priority (~35 tools)
|
||||
Secondary functionality:
|
||||
- **Deck** (9 tools): Kanban boards
|
||||
- **Tables** (7 tools): Structured data
|
||||
- **Sharing** (5 tools): File sharing
|
||||
|
||||
### Low Priority (~14 tools)
|
||||
Less frequently used:
|
||||
- **Cookbook** (8 tools): Recipe management
|
||||
- **News** (6 tools): RSS feeds
|
||||
|
||||
## Implementation Plan
|
||||
|
||||
### Week 1: Phase 1 - Titles
|
||||
- Add human-readable titles to all 101 tools
|
||||
- Update tool name mapping in documentation
|
||||
- Manual test in MCP inspector
|
||||
|
||||
### Week 2: Phase 2 - ToolAnnotations (High Priority)
|
||||
- Add annotations to Critical and High priority tools (~52 tools)
|
||||
- Focus on Notes, WebDAV, Calendar, Semantic, OAuth
|
||||
- Add unit tests validating annotation presence
|
||||
|
||||
### Week 3: Phase 2 - ToolAnnotations (Medium/Low Priority)
|
||||
- Complete remaining tools (~49 tools)
|
||||
- Deck, Tables, Contacts, Cookbook, News
|
||||
- Update tool listings in README
|
||||
|
||||
### Week 4: Phase 3 - Parameter Descriptions
|
||||
- Add Field() descriptions to Critical/High priority tools
|
||||
- Start with OAuth, Notes, WebDAV modules
|
||||
- Incremental completion over time
|
||||
|
||||
## Benefits
|
||||
|
||||
### For Users
|
||||
- **Clearer UI**: "Create Note" vs "nc_notes_create_note"
|
||||
- **Safety**: Warnings before destructive operations
|
||||
- **Better help**: Parameter descriptions in auto-completion
|
||||
- **Confidence**: Know which operations are safe to retry
|
||||
|
||||
### For MCP Clients
|
||||
- **Caching**: Cache results from read-only tools
|
||||
- **Safety prompts**: Warn before destructiveHint=true
|
||||
- **Retry logic**: Safely retry idempotent operations
|
||||
- **UI organization**: Group by behavior (reads vs writes vs deletes)
|
||||
- **Performance**: Optimize based on hints
|
||||
|
||||
### For Developers
|
||||
- **Self-documenting**: Behavior is explicit
|
||||
- **Consistency**: Standard patterns across codebase
|
||||
- **Testing**: Validate annotations match implementation
|
||||
- **Maintenance**: Clear expectations for new tools
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
- Immediate UX improvement with minimal effort
|
||||
- Clients can make smarter decisions
|
||||
- Self-documenting code
|
||||
- Follows MCP best practices
|
||||
|
||||
### Negative
|
||||
- Initial effort to add annotations (12-15 hours total)
|
||||
- Must maintain annotations when adding new tools
|
||||
- Risk of incorrect annotations misleading clients
|
||||
|
||||
### Neutral
|
||||
- Annotations are hints, not guarantees
|
||||
- Clients may ignore annotations
|
||||
- Backward compatible (additive change)
|
||||
|
||||
### Mitigations
|
||||
- **Incorrect annotations**: Add tests validating behavior matches hints
|
||||
- **Maintenance burden**: Add to code review checklist and tool template
|
||||
- **Documentation**: Update CLAUDE.md with annotation guidelines
|
||||
|
||||
## Examples
|
||||
|
||||
### Complete Annotated Tool (Delete)
|
||||
|
||||
```python
|
||||
from mcp.types import ToolAnnotations
|
||||
from pydantic import Field
|
||||
|
||||
@mcp.tool(
|
||||
title="Delete Note",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, # Deletes data permanently
|
||||
idempotentHint=True, # Same end state (note doesn't exist)
|
||||
openWorldHint=True # Nextcloud is external
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_delete_note(
|
||||
note_id: int = Field(description="The ID of the note to delete permanently"),
|
||||
ctx: Context
|
||||
) -> DeleteNoteResponse:
|
||||
"""Delete a note permanently (requires notes:write scope)"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
### Complete Annotated Tool (Update)
|
||||
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Update Note",
|
||||
annotations=ToolAnnotations(
|
||||
idempotentHint=False, # NOT idempotent: etag changes each update
|
||||
openWorldHint=True
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:write")
|
||||
@instrument_tool
|
||||
async def nc_notes_update_note(
|
||||
note_id: int = Field(description="The ID of the note to update"),
|
||||
title: str | None = Field(
|
||||
default=None,
|
||||
description="New title (omit to keep current)"
|
||||
),
|
||||
content: str | None = Field(
|
||||
default=None,
|
||||
description="New markdown content (omit to keep current)"
|
||||
),
|
||||
category: str | None = Field(
|
||||
default=None,
|
||||
description="New category/folder (omit to keep current)"
|
||||
),
|
||||
etag: str = Field(
|
||||
description="ETag from get_note (prevents concurrent modification)"
|
||||
),
|
||||
ctx: Context
|
||||
) -> UpdateNoteResponse:
|
||||
"""Update an existing note's title, content, or category.
|
||||
|
||||
The etag parameter is required to prevent overwriting concurrent changes.
|
||||
Get the current ETag by first calling nc_notes_get_note.
|
||||
If the note has been modified since you retrieved it, the update will fail.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
### Complete Annotated Tool (Read-Only)
|
||||
|
||||
```python
|
||||
@mcp.tool(
|
||||
title="Search Notes",
|
||||
annotations=ToolAnnotations(
|
||||
readOnlyHint=True, # Doesn't modify data
|
||||
openWorldHint=True # Queries Nextcloud
|
||||
)
|
||||
)
|
||||
@require_scopes("notes:read")
|
||||
@instrument_tool
|
||||
async def nc_notes_search_notes(
|
||||
query: str = Field(description="Search term to match in note titles or content"),
|
||||
ctx: Context
|
||||
) -> SearchNotesResponse:
|
||||
"""Search notes by title or content, returning id, title, and category.
|
||||
|
||||
This is a read-only operation that searches across all user notes.
|
||||
Use nc_notes_get_note to retrieve the full content of matching notes.
|
||||
"""
|
||||
client = await get_client(ctx)
|
||||
# ... implementation ...
|
||||
```
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
### Unit Tests
|
||||
Add tests validating annotation presence and correctness:
|
||||
|
||||
```python
|
||||
def test_notes_tools_have_annotations():
|
||||
"""Verify all notes tools have appropriate annotations."""
|
||||
tools = get_registered_tools(mcp)
|
||||
|
||||
# Check create tool
|
||||
create_tool = tools["nc_notes_create_note"]
|
||||
assert create_tool.title == "Create Note"
|
||||
assert create_tool.annotations.idempotentHint is False
|
||||
|
||||
# Check delete tool
|
||||
delete_tool = tools["nc_notes_delete_note"]
|
||||
assert delete_tool.title == "Delete Note"
|
||||
assert delete_tool.annotations.destructiveHint is True
|
||||
assert delete_tool.annotations.idempotentHint is True
|
||||
|
||||
# Check read-only tool
|
||||
search_tool = tools["nc_notes_search_notes"]
|
||||
assert search_tool.title == "Search Notes"
|
||||
assert search_tool.annotations.readOnlyHint is True
|
||||
```
|
||||
|
||||
### Integration Tests
|
||||
- Verify existing tests pass with annotations
|
||||
- Manual testing in MCP inspector/client
|
||||
|
||||
### Documentation Updates
|
||||
- Update README tool listings with new titles
|
||||
- Add annotation guidelines to CLAUDE.md
|
||||
- Include examples in developer documentation
|
||||
|
||||
## Resolved Questions
|
||||
|
||||
1. **WebDAV write_file idempotency** (Resolved: 2025-12-11)
|
||||
- **Decision**: Mark as `idempotentHint=True`
|
||||
- **Rationale**: Uses HTTP PUT without version control. Writing same content to same path repeatedly produces identical end state, which is the definition of idempotency in HTTP semantics.
|
||||
|
||||
2. **Semantic search openWorldHint** (Resolved: 2025-12-11)
|
||||
- **Decision**: Mark as `openWorldHint=True`
|
||||
- **Rationale**: For consistency with other Nextcloud tools. While the data being searched is "indexed/internal", Nextcloud itself is external to the MCP server. The fact that data is indexed is an implementation detail, not a fundamental difference from other Nextcloud queries.
|
||||
|
||||
3. **Read-only with side effects**: Should tools that log analytics still be readOnlyHint=true?
|
||||
- **Decision**: Yes. Logging/analytics are non-visible side effects that don't change user-observable state. Read-only refers to data modifications that affect the user's content.
|
||||
|
||||
## Future Considerations
|
||||
|
||||
1. **Icons**: Visual icons for tools (requires design work, deferred to future ADR)
|
||||
2. **Parameter descriptions**: Add Pydantic `Field(description=...)` for better auto-completion (Phase 3, future work)
|
||||
|
||||
## References
|
||||
|
||||
- MCP Python SDK: `/home/chris/Software/python-sdk/`
|
||||
- ToolAnnotations spec: `src/mcp/types.py:1247`
|
||||
- FastMCP decorator: `src/mcp/server/fastmcp/server.py:444`
|
||||
- Examples: `examples/fastmcp/parameter_descriptions.py`, `examples/fastmcp/icons_demo.py`
|
||||
|
||||
## Decision Timeline
|
||||
|
||||
- **Proposed**: 2025-12-11
|
||||
- **Reviewed**: 2025-12-11 (Self-review during implementation)
|
||||
- **Accepted**: 2025-12-11
|
||||
- **Implemented**: 2025-12-11 (Phase 1 & 2 complete)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,342 +0,0 @@
|
||||
# ADR-020: Deployment Modes and Configuration Validation
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2025-12-20
|
||||
**Deciders:** Development Team
|
||||
**Related:** ADR-002 (Vector Sync), ADR-004 (Progressive Consent), ADR-019 (Multi-user BasicAuth)
|
||||
|
||||
## Context
|
||||
|
||||
The MCP server supports multiple deployment scenarios with different authentication methods, storage backends, and feature sets. Over time, the configuration system evolved to support ~500+ possible combinations across deployment modes, authentication patterns, and feature toggles. This complexity made it difficult to:
|
||||
|
||||
1. Understand what configuration is required for a given deployment
|
||||
2. Debug configuration errors (validation scattered across multiple files)
|
||||
3. Provide helpful error messages when configuration is invalid
|
||||
4. Maintain clear boundaries between deployment modes
|
||||
|
||||
**Problems Identified:**
|
||||
- No single source of truth for "what config is required for mode X"
|
||||
- Validation happening at 4+ different points (Settings.__post_init__, setup_oauth_config(), context helpers, starlette_lifespan)
|
||||
- Startup sequence unclear (OAuth setup before FastMCP creation, sync initialization errors)
|
||||
- Error messages generic ("X is required") without explaining which deployment mode triggered the requirement
|
||||
- Multiple overlapping decision trees (deployment mode, auth mode, features)
|
||||
|
||||
## Decision
|
||||
|
||||
We formalize five distinct deployment modes with explicit configuration requirements and implement centralized configuration validation.
|
||||
|
||||
### Deployment Modes
|
||||
|
||||
#### 1. Single-User BasicAuth
|
||||
|
||||
**Use Case:** Personal Nextcloud instance, local development
|
||||
|
||||
**Required Configuration:**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password # Or app password
|
||||
```
|
||||
|
||||
**Optional Configuration:**
|
||||
```bash
|
||||
# Vector sync (semantic search)
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
QDRANT_LOCATION=/path/to/qdrant # Or QDRANT_URL for remote
|
||||
|
||||
# Embeddings (optional - Simple provider used as fallback)
|
||||
OLLAMA_BASE_URL=http://localhost:11434
|
||||
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# Document processing
|
||||
DOCUMENT_CHUNK_SIZE=512
|
||||
DOCUMENT_CHUNK_OVERLAP=50
|
||||
```
|
||||
|
||||
**Characteristics:**
|
||||
- Single shared NextcloudClient created at startup
|
||||
- No OAuth infrastructure needed
|
||||
- No multi-user support
|
||||
- Vector sync runs as single-user background task
|
||||
- Admin UI available at /app
|
||||
|
||||
---
|
||||
|
||||
#### 2. Multi-User BasicAuth Pass-Through
|
||||
|
||||
**Use Case:** Internal deployment where users provide their own credentials, no background sync needed
|
||||
|
||||
**Required Configuration:**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://nextcloud.example.com
|
||||
ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
```
|
||||
|
||||
**Optional Configuration:**
|
||||
```bash
|
||||
# For background sync (requires app passwords from Astrolabe)
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
# ... plus Qdrant and embedding config
|
||||
```
|
||||
|
||||
**Conditional Requirements:**
|
||||
- If `ENABLE_OFFLINE_ACCESS=true`: requires `NEXTCLOUD_OIDC_CLIENT_ID`, `NEXTCLOUD_OIDC_CLIENT_SECRET`, `TOKEN_ENCRYPTION_KEY`, `TOKEN_STORAGE_DB`
|
||||
- If `VECTOR_SYNC_ENABLED=true`: requires `ENABLE_OFFLINE_ACCESS=true`
|
||||
|
||||
**Characteristics:**
|
||||
- No OAuth for client authentication (uses BasicAuth in request headers)
|
||||
- BasicAuthMiddleware extracts credentials from Authorization header
|
||||
- Client created per-request from extracted credentials
|
||||
- Optional: Background sync using app passwords (via Astrolabe API)
|
||||
- Admin UI available at /app
|
||||
|
||||
---
|
||||
|
||||
#### 3. OAuth Single-Audience (Default)
|
||||
|
||||
**Use Case:** Multi-user deployment with OAuth authentication, tokens work for both MCP and Nextcloud
|
||||
|
||||
**Required Configuration:**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://nextcloud.example.com
|
||||
# No NEXTCLOUD_USERNAME/PASSWORD (triggers OAuth mode)
|
||||
```
|
||||
|
||||
**Auto-Configured:**
|
||||
- OIDC discovery URL: `{NEXTCLOUD_HOST}/.well-known/openid-configuration`
|
||||
- Client credentials: Dynamic Client Registration (DCR) if available
|
||||
- Token storage: SQLite at `~/.oauth/clients.db`
|
||||
|
||||
**Optional Configuration:**
|
||||
```bash
|
||||
# Static client credentials (instead of DCR)
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
|
||||
|
||||
# Offline access for background sync
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
# ... plus Qdrant and embedding config
|
||||
|
||||
# Scopes
|
||||
NEXTCLOUD_OIDC_SCOPES="openid profile email notes:read notes:write ..."
|
||||
```
|
||||
|
||||
**Conditional Requirements:**
|
||||
- If `ENABLE_OFFLINE_ACCESS=true`: requires `TOKEN_ENCRYPTION_KEY`, `TOKEN_STORAGE_DB`
|
||||
- If `VECTOR_SYNC_ENABLED=true`: requires `ENABLE_OFFLINE_ACCESS=true`
|
||||
|
||||
**Characteristics:**
|
||||
- Tokens contain both `aud: ["mcp-server", "nextcloud"]`
|
||||
- Pass token through to Nextcloud APIs (no exchange)
|
||||
- Client created per-request from token in Authorization header
|
||||
- Background sync uses refresh tokens (if offline_access enabled)
|
||||
- Admin UI available at /app
|
||||
|
||||
---
|
||||
|
||||
#### 4. OAuth Token Exchange (RFC 8693)
|
||||
|
||||
**Use Case:** Multi-user deployment where MCP token is separate from Nextcloud token
|
||||
|
||||
**Required Configuration:**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://nextcloud.example.com
|
||||
ENABLE_TOKEN_EXCHANGE=true
|
||||
# No NEXTCLOUD_USERNAME/PASSWORD (triggers OAuth mode)
|
||||
```
|
||||
|
||||
**Optional Configuration:**
|
||||
- Same as OAuth Single-Audience, plus:
|
||||
```bash
|
||||
TOKEN_EXCHANGE_CACHE_TTL=300 # Cache exchanged tokens
|
||||
```
|
||||
|
||||
**Characteristics:**
|
||||
- Tokens contain only `aud: "mcp-server"`
|
||||
- MCP server exchanges token for Nextcloud token via RFC 8693
|
||||
- Exchanged tokens cached per-user
|
||||
- Client created per-request using exchanged token
|
||||
- Background sync uses refresh tokens (if offline_access enabled)
|
||||
|
||||
---
|
||||
|
||||
#### 5. Smithery Stateless
|
||||
|
||||
**Use Case:** Multi-tenant SaaS deployment via Smithery platform
|
||||
|
||||
**Required Configuration:**
|
||||
- None! Configuration comes from session URL params: `?nextcloud_url=...&username=...&app_password=...`
|
||||
|
||||
**Forbidden Configuration:**
|
||||
- Must NOT set: `NEXTCLOUD_HOST`, `NEXTCLOUD_USERNAME`, `NEXTCLOUD_PASSWORD`, `ENABLE_MULTI_USER_BASIC_AUTH`, `ENABLE_TOKEN_EXCHANGE`, `ENABLE_OFFLINE_ACCESS`, `VECTOR_SYNC_ENABLED`, `NEXTCLOUD_OIDC_CLIENT_ID`, `NEXTCLOUD_OIDC_CLIENT_SECRET`
|
||||
|
||||
**Characteristics:**
|
||||
- No persistent storage (stateless)
|
||||
- Client created per-request from session config
|
||||
- No vector sync (disabled)
|
||||
- No admin UI (no /app routes)
|
||||
- No OAuth infrastructure
|
||||
|
||||
---
|
||||
|
||||
### Configuration Validation
|
||||
|
||||
**Implementation:** `nextcloud_mcp_server/config_validators.py`
|
||||
|
||||
**Key Functions:**
|
||||
```python
|
||||
def detect_auth_mode(settings: Settings) -> AuthMode:
|
||||
"""Detect authentication mode from configuration.
|
||||
|
||||
Priority (most specific to most general):
|
||||
1. Smithery (explicit flag)
|
||||
2. Token exchange (most specific OAuth mode)
|
||||
3. Multi-user BasicAuth
|
||||
4. Single-user BasicAuth
|
||||
5. OAuth single-audience (default OAuth mode)
|
||||
"""
|
||||
|
||||
def validate_configuration(settings: Settings) -> tuple[AuthMode, list[str]]:
|
||||
"""Validate configuration for detected mode.
|
||||
|
||||
Returns:
|
||||
Tuple of (detected_mode, list_of_errors)
|
||||
Empty list means valid configuration.
|
||||
"""
|
||||
```
|
||||
|
||||
**Validation Rules:**
|
||||
- **Required variables:** Must be set and non-empty
|
||||
- **Forbidden variables:** Must NOT be set (or must be False for booleans)
|
||||
- **Conditional requirements:** If feature X is enabled, requires variables Y and Z
|
||||
|
||||
**Error Messages:**
|
||||
```
|
||||
Configuration validation failed for {mode} mode:
|
||||
- [{mode}] Missing required configuration: NEXTCLOUD_HOST
|
||||
- [{mode}] ENABLE_OFFLINE_ACCESS must be enabled when VECTOR_SYNC_ENABLED is true
|
||||
|
||||
Mode: {mode}
|
||||
Description: {mode_description}
|
||||
|
||||
Required configuration:
|
||||
- VAR1
|
||||
- VAR2
|
||||
|
||||
Optional configuration:
|
||||
- VAR3
|
||||
- VAR4
|
||||
|
||||
Conditional requirements:
|
||||
When FEATURE is enabled:
|
||||
- VAR5
|
||||
- VAR6
|
||||
```
|
||||
|
||||
**Integration:**
|
||||
- Validation runs at app startup in `get_app()` (app.py:1048-1062)
|
||||
- All errors reported before any initialization begins
|
||||
- Mode-specific error messages explain requirements
|
||||
- Validation uses the same Settings object used throughout the app
|
||||
|
||||
### Configuration Matrix
|
||||
|
||||
| Variable | Single BasicAuth | Multi BasicAuth | OAuth Single | OAuth Exchange | Smithery |
|
||||
|----------|------------------|-----------------|--------------|----------------|----------|
|
||||
| **NEXTCLOUD_HOST** | Required | Required | Required | Required | Forbidden |
|
||||
| **NEXTCLOUD_USERNAME** | Required | Forbidden | Forbidden | Forbidden | Forbidden |
|
||||
| **NEXTCLOUD_PASSWORD** | Required | Forbidden | Forbidden | Forbidden | Forbidden |
|
||||
| **ENABLE_MULTI_USER_BASIC_AUTH** | Forbidden | Required | Forbidden | Forbidden | Forbidden |
|
||||
| **ENABLE_TOKEN_EXCHANGE** | Forbidden | Forbidden | Forbidden | Required | Forbidden |
|
||||
| **ENABLE_OFFLINE_ACCESS** | Optional\* | Optional\* | Optional\* | Optional\* | Forbidden |
|
||||
| **TOKEN_ENCRYPTION_KEY** | If offline | If offline | If offline | If offline | Forbidden |
|
||||
| **TOKEN_STORAGE_DB** | If offline | If offline | If offline | If offline | Forbidden |
|
||||
| **OIDC_CLIENT_ID** | Forbidden | If offline | Optional\*\* | Optional\*\* | Forbidden |
|
||||
| **OIDC_CLIENT_SECRET** | Forbidden | If offline | Optional\*\* | Optional\*\* | Forbidden |
|
||||
| **VECTOR_SYNC_ENABLED** | Optional | Optional | Optional | Optional | Forbidden |
|
||||
| **QDRANT_URL/LOCATION** | If vector | If vector | If vector | If vector | Forbidden |
|
||||
| **OLLAMA_BASE_URL/OPENAI_API_KEY** | Optional | Optional | Optional | Optional | Forbidden |
|
||||
|
||||
\* Only enables background sync for semantic search
|
||||
\*\* Uses DCR if not provided
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Clarity:** Single function to detect mode from config
|
||||
2. **Validation:** All config validated upfront with helpful errors
|
||||
3. **Debugging:** Clear logs showing "Running in X mode with config Y"
|
||||
4. **Maintenance:** Mode-specific logic can be isolated
|
||||
5. **Documentation:** Clear mapping of mode → required config
|
||||
6. **Error Messages:** Context-aware ("X is required for Y mode")
|
||||
7. **Testing:** Each mode testable in isolation
|
||||
|
||||
### Negative
|
||||
|
||||
1. **Migration:** Existing invalid configurations will now fail at startup
|
||||
2. **Flexibility:** Less flexibility in configuration combinations
|
||||
3. **Strictness:** Some previously-working combinations may be rejected
|
||||
|
||||
### Neutral
|
||||
|
||||
1. **Backward Compatibility:** Valid configurations continue to work
|
||||
2. **Mode Detection:** Automatic based on config (no explicit mode selection)
|
||||
3. **Default Mode:** OAuth single-audience when no credentials provided
|
||||
|
||||
## Implementation Notes
|
||||
|
||||
### Embedding Provider Validation
|
||||
|
||||
Originally, validation required either `OLLAMA_BASE_URL` or `OPENAI_API_KEY` when vector sync was enabled. This was too strict because the Simple provider is always available as a fallback (ADR-015). The validation was removed to allow vector sync without explicit provider configuration.
|
||||
|
||||
### Variable Scoping Issues
|
||||
|
||||
During implementation, several Python variable scoping issues were discovered in `app.py`:
|
||||
- Local variable assignments in `starlette_lifespan()` shadowed outer scope variables
|
||||
- Fixed by using unique variable names (e.g., `nextcloud_host_for_context`, `basic_auth_storage`)
|
||||
- Removed redundant `settings = get_settings()` call (re-used outer scope)
|
||||
|
||||
### Docker Compose Configuration
|
||||
|
||||
The `mcp-oauth` service configuration was updated to remove `ENABLE_MULTI_USER_BASIC_AUTH=true` which conflicted with its intended OAuth mode. The service now runs in OAuth single-audience mode with vector sync using the Simple embedding provider as fallback.
|
||||
|
||||
## Testing
|
||||
|
||||
### Unit Tests
|
||||
|
||||
`tests/unit/test_config_validators.py` provides comprehensive coverage:
|
||||
- Mode detection with priority ordering (7 tests)
|
||||
- Single-user BasicAuth validation (8 tests)
|
||||
- Multi-user BasicAuth validation (7 tests)
|
||||
- OAuth single-audience validation (6 tests)
|
||||
- OAuth token exchange validation (3 tests)
|
||||
- Smithery validation (4 tests)
|
||||
- Mode summary generation (3 tests)
|
||||
- Edge cases (3 tests)
|
||||
|
||||
**Total: 41 tests, all passing**
|
||||
|
||||
### Integration Tests
|
||||
|
||||
Integration tests verify that:
|
||||
- Each mode starts successfully with valid configuration
|
||||
- Invalid configurations fail with clear error messages
|
||||
- Existing deployments continue to work
|
||||
|
||||
## References
|
||||
|
||||
- [ADR-002: Vector Sync Authentication](ADR-002-vector-sync-authentication.md)
|
||||
- [ADR-004: Progressive Consent](ADR-004-progressive-consent.md)
|
||||
- [ADR-015: Unified Provider Architecture](ADR-015-unified-provider-architecture.md)
|
||||
- [ADR-019: Multi-user BasicAuth Pass-Through](ADR-019-multi-user-basicauth-passthrough.md)
|
||||
- Implementation: `nextcloud_mcp_server/config_validators.py`
|
||||
- Tests: `tests/unit/test_config_validators.py`
|
||||
@@ -1,391 +0,0 @@
|
||||
# ADR-021: Configuration Consolidation and Simplification
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2025-12-21
|
||||
**Deciders:** Development Team
|
||||
**Related:** ADR-020 (Deployment Modes), ADR-002 (Vector Sync), ADR-004 (Progressive Consent)
|
||||
|
||||
## Context
|
||||
|
||||
The configuration system has grown complex with overlapping concerns that make it difficult for users to switch between deployment modes and understand configuration dependencies.
|
||||
|
||||
### Problems Identified
|
||||
|
||||
1. **Confusing variable names don't reflect purpose**:
|
||||
- `ENABLE_OFFLINE_ACCESS` - Actually controls refresh token storage for background operations, not general "offline" capabilities
|
||||
- `VECTOR_SYNC_ENABLED` - Controls semantic search background indexing (implementation detail, not user-facing feature name)
|
||||
- Users struggle to understand what these variables actually control
|
||||
|
||||
2. **Redundant configuration requirements**:
|
||||
- Multi-user semantic search requires setting BOTH `ENABLE_OFFLINE_ACCESS=true` AND `VECTOR_SYNC_ENABLED=true`
|
||||
- The dependency is one-way (semantic search needs background ops, but background ops don't need semantic search)
|
||||
- Users must understand internal implementation details to configure a user-facing feature
|
||||
|
||||
3. **Implicit mode detection creates ambiguity**:
|
||||
- Five deployment modes detected via priority-based logic
|
||||
- Users can't easily predict which mode will activate
|
||||
- Configuration errors don't clearly indicate which mode triggered the requirement
|
||||
|
||||
4. **OIDC_CLIENT_ID vs NEXTCLOUD_OIDC_CLIENT_ID confusion**:
|
||||
- Investigation revealed these are NOT actually overlapping (`OIDC_CLIENT_ID` is test-only)
|
||||
- However, their similar names create confusion
|
||||
|
||||
### Current Configuration Complexity
|
||||
|
||||
**Example: Multi-user OAuth with semantic search**:
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_OFFLINE_ACCESS=true # Why is this needed?
|
||||
VECTOR_SYNC_ENABLED=true # And this separately?
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
```
|
||||
|
||||
Users must understand:
|
||||
- Semantic search requires background token storage (ENABLE_OFFLINE_ACCESS)
|
||||
- Background token storage requires encryption keys
|
||||
- The relationship between ENABLE_OFFLINE_ACCESS and VECTOR_SYNC_ENABLED
|
||||
- Which deployment mode these settings will activate
|
||||
|
||||
## Decision
|
||||
|
||||
We consolidate overlapping functionality and add explicit mode selection while maintaining 100% backward compatibility.
|
||||
|
||||
### 1. Automatic Dependency Resolution
|
||||
|
||||
**Make ENABLE_SEMANTIC_SEARCH the primary control** that automatically enables required dependencies:
|
||||
|
||||
**New behavior**:
|
||||
```python
|
||||
@property
|
||||
def enable_background_operations(self) -> bool:
|
||||
"""Background operations - auto-enabled by semantic search in multi-user modes."""
|
||||
# Check new names first
|
||||
explicit = os.getenv("ENABLE_BACKGROUND_OPERATIONS", "").lower() == "true"
|
||||
# Fall back to old name with deprecation warning
|
||||
legacy = os.getenv("ENABLE_OFFLINE_ACCESS", "").lower() == "true"
|
||||
# Auto-enable if semantic search needs it
|
||||
auto_enabled = self.enable_semantic_search and self.is_multi_user_mode()
|
||||
|
||||
return explicit or legacy or auto_enabled
|
||||
|
||||
@property
|
||||
def enable_semantic_search(self) -> bool:
|
||||
"""Semantic search - renamed from VECTOR_SYNC_ENABLED."""
|
||||
new_value = os.getenv("ENABLE_SEMANTIC_SEARCH", "").lower() == "true"
|
||||
old_value = os.getenv("VECTOR_SYNC_ENABLED", "").lower() == "true"
|
||||
return new_value or old_value
|
||||
```
|
||||
|
||||
**Result**: Users set `ENABLE_SEMANTIC_SEARCH=true` and the system automatically enables background token storage when needed.
|
||||
|
||||
### 2. Explicit Mode Selection (Optional)
|
||||
|
||||
Add `MCP_DEPLOYMENT_MODE` environment variable to remove detection ambiguity:
|
||||
|
||||
```bash
|
||||
# Optional: Explicitly declare deployment mode
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# Valid values: single_user_basic, multi_user_basic,
|
||||
# oauth_single_audience, oauth_token_exchange, smithery
|
||||
```
|
||||
|
||||
**Detection logic**:
|
||||
1. If `MCP_DEPLOYMENT_MODE` is set → validate and use it
|
||||
2. Otherwise → use priority-based auto-detection (existing behavior)
|
||||
3. Validate explicit mode doesn't conflict with detected mode
|
||||
|
||||
### 3. Simplified User Experience
|
||||
|
||||
**Before**:
|
||||
```bash
|
||||
# Multi-user OAuth with semantic search
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_OFFLINE_ACCESS=true # Confusing
|
||||
VECTOR_SYNC_ENABLED=true # Why both?
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
```
|
||||
|
||||
**After**:
|
||||
```bash
|
||||
# Multi-user OAuth with semantic search
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience # Explicit (optional)
|
||||
ENABLE_SEMANTIC_SEARCH=true # Auto-enables background ops
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- 2 fewer variables to understand/set
|
||||
- Clear intent ("I want semantic search")
|
||||
- Explicit mode declaration (optional)
|
||||
- All existing configs continue working
|
||||
|
||||
### 4. Variable Naming Strategy
|
||||
|
||||
**Deprecated (but still functional)**:
|
||||
- `ENABLE_OFFLINE_ACCESS` → Renamed to `ENABLE_BACKGROUND_OPERATIONS`
|
||||
- `VECTOR_SYNC_ENABLED` → Renamed to `ENABLE_SEMANTIC_SEARCH`
|
||||
|
||||
**No change needed**:
|
||||
- `VECTOR_SYNC_SCAN_INTERVAL` - Implementation tuning parameter (keep as-is)
|
||||
- `VECTOR_SYNC_PROCESSOR_WORKERS` - Implementation tuning parameter (keep as-is)
|
||||
- `VECTOR_SYNC_QUEUE_MAX_SIZE` - Implementation tuning parameter (keep as-is)
|
||||
|
||||
**Rationale**: Only rename user-facing feature flags, not internal tuning parameters.
|
||||
|
||||
### 5. Backward Compatibility
|
||||
|
||||
**Support both old and new names for minimum 2 major versions**:
|
||||
|
||||
```python
|
||||
@property
|
||||
def enable_semantic_search(self) -> bool:
|
||||
new_value = os.getenv("ENABLE_SEMANTIC_SEARCH", "").lower() == "true"
|
||||
old_value = os.getenv("VECTOR_SYNC_ENABLED", "").lower() == "true"
|
||||
|
||||
if new_value and old_value:
|
||||
logger.warning(
|
||||
"Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. "
|
||||
"Using ENABLE_SEMANTIC_SEARCH. VECTOR_SYNC_ENABLED is deprecated."
|
||||
)
|
||||
|
||||
if old_value and not new_value:
|
||||
logger.warning(
|
||||
"VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead."
|
||||
)
|
||||
|
||||
return new_value or old_value
|
||||
```
|
||||
|
||||
**Deprecation timeline**:
|
||||
- v0.6.0: Add new variables, deprecate old ones (both work with warnings)
|
||||
- v1.0.0: Remove old variables (breaking change, well-announced)
|
||||
- Minimum 2 major versions of support (12+ months)
|
||||
|
||||
## Consequences
|
||||
|
||||
### Positive
|
||||
|
||||
1. **Reduced cognitive load**: Users set `ENABLE_SEMANTIC_SEARCH=true` instead of understanding internal dependencies
|
||||
2. **Clearer intent**: Variable names reflect user-facing features, not implementation details
|
||||
3. **Explicit mode control**: `MCP_DEPLOYMENT_MODE` removes detection ambiguity
|
||||
4. **Better onboarding**: New users see simpler configuration in env.sample
|
||||
5. **Improved error messages**: Validation can suggest "set MCP_DEPLOYMENT_MODE=X" instead of relying on implicit detection
|
||||
6. **No breaking changes**: All existing configurations continue working
|
||||
|
||||
### Negative
|
||||
|
||||
1. **Transition period complexity**: Both old and new names supported for 2+ versions
|
||||
2. **Documentation burden**: All docs must be updated to show new approach
|
||||
3. **Test coverage expansion**: Must test both old and new variable names in all modes
|
||||
4. **Migration effort**: Existing deployments should eventually migrate (optional but recommended)
|
||||
|
||||
### Neutral
|
||||
|
||||
1. **Same functionality**: No new features, just better organization
|
||||
2. **Same validation**: Underlying requirements unchanged (e.g., semantic search still needs Qdrant)
|
||||
3. **Same performance**: No runtime performance impact
|
||||
|
||||
## Implementation
|
||||
|
||||
### Phase 1: Configuration Consolidation (v0.6.0)
|
||||
|
||||
**Files to modify**:
|
||||
- `nextcloud_mcp_server/config.py` - Add property-based deprecation with auto-enablement
|
||||
- `nextcloud_mcp_server/config_validators.py` - Simplify validation (semantic search no longer requires explicit background operations setting)
|
||||
- `nextcloud_mcp_server/app.py` - Add informative logging for auto-enablement
|
||||
- `tests/unit/test_config_validators.py` - Add auto-enablement tests
|
||||
- `docs/configuration-migration-v2.md` - Create migration guide
|
||||
|
||||
**Key changes**:
|
||||
1. `enable_background_operations` property auto-enables when `enable_semantic_search=true` in multi-user modes
|
||||
2. `enable_semantic_search` property accepts both `ENABLE_SEMANTIC_SEARCH` and `VECTOR_SYNC_ENABLED`
|
||||
3. Smart logging when auto-enablement occurs or deprecated variables used
|
||||
4. Validation simplified to remove redundant requirements
|
||||
|
||||
### Phase 2: Explicit Mode Selection (v0.6.0)
|
||||
|
||||
**Files to modify**:
|
||||
- `nextcloud_mcp_server/config.py` - Add `deployment_mode` field
|
||||
- `nextcloud_mcp_server/config_validators.py` - Check explicit mode first, fall back to auto-detection
|
||||
- `tests/unit/test_config_validators.py` - Test mode override and conflict detection
|
||||
- `docs/configuration.md` - Document mode selection
|
||||
|
||||
**Key changes**:
|
||||
1. Add `MCP_DEPLOYMENT_MODE` environment variable (optional)
|
||||
2. Mode detection checks explicit mode first, then auto-detects
|
||||
3. Validate explicit mode doesn't conflict with detected mode
|
||||
4. Better error messages referencing explicit mode setting
|
||||
|
||||
### Phase 3: env.sample Reorganization (v0.6.0)
|
||||
|
||||
**Files to create/modify**:
|
||||
- `env.sample` - Reorganize by deployment mode
|
||||
- `env.sample.single-user` - Simplest config template
|
||||
- `env.sample.oauth-multi-user` - Multi-user template showing consolidation
|
||||
- `env.sample.oauth-advanced` - Token exchange mode template
|
||||
- `README.md` - Update Quick Start to reference templates
|
||||
|
||||
**Key changes**:
|
||||
1. Group related settings by deployment mode
|
||||
2. Show simplified configuration (only essential variables)
|
||||
3. Document automatic dependencies inline
|
||||
4. Provide mode-specific quick-start templates
|
||||
|
||||
### Phase 4: Documentation Updates (v0.7.0)
|
||||
|
||||
**Files to modify**:
|
||||
- `docs/configuration.md` - Lead with consolidated approach
|
||||
- `docs/authentication.md` - Update mode guidance with `MCP_DEPLOYMENT_MODE`
|
||||
- `docs/troubleshooting.md` - Add consolidation troubleshooting section
|
||||
- `docs/configuration-migration-v2.md` - Expand with comprehensive examples
|
||||
- `docs/ADR-020-deployment-modes-and-configuration-validation.md` - Update configuration matrix
|
||||
- All other ADRs - Update variable references
|
||||
|
||||
**Key changes**:
|
||||
1. Update all examples to use new variable names
|
||||
2. Add before/after migration examples
|
||||
3. Document automatic dependency resolution
|
||||
4. Add mode selection decision tree diagram
|
||||
|
||||
## Validation Strategy
|
||||
|
||||
### Test Coverage Requirements
|
||||
|
||||
**Backward compatibility tests**:
|
||||
- Old variable names still work (ENABLE_OFFLINE_ACCESS, VECTOR_SYNC_ENABLED)
|
||||
- New variable names work (ENABLE_BACKGROUND_OPERATIONS, ENABLE_SEMANTIC_SEARCH)
|
||||
- Setting both old and new triggers deprecation warning but works correctly
|
||||
- All 41 existing config validation tests pass
|
||||
|
||||
**Auto-enablement tests**:
|
||||
- `ENABLE_SEMANTIC_SEARCH=true` in OAuth mode → `enable_background_operations=true`
|
||||
- `ENABLE_SEMANTIC_SEARCH=true` in single-user mode → `enable_background_operations=false` (not needed)
|
||||
- `ENABLE_SEMANTIC_SEARCH=false` → `enable_background_operations=false` (unless explicitly set)
|
||||
|
||||
**Mode selection tests**:
|
||||
- `MCP_DEPLOYMENT_MODE=oauth_single_audience` → mode correctly detected
|
||||
- `MCP_DEPLOYMENT_MODE` conflicts with detected mode → validation error
|
||||
- No `MCP_DEPLOYMENT_MODE` → auto-detection works as before
|
||||
|
||||
## Success Metrics
|
||||
|
||||
**Immediate** (v0.6.0 release):
|
||||
- Zero breaking changes in existing deployments
|
||||
- All 41 config validation tests pass
|
||||
- New users report clearer configuration process
|
||||
|
||||
**Medium-term** (6 months after v0.6.0):
|
||||
- 80% of new deployments use new variable names
|
||||
- Mode selection errors decrease by 50%
|
||||
- Support requests about configuration decrease
|
||||
|
||||
**Long-term** (12+ months):
|
||||
- 90% of deployments migrated to new names
|
||||
- Old variable names can be safely removed in v1.0.0
|
||||
- Configuration-related issues in issue tracker decrease
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### Alternative 1: Just Rename Variables
|
||||
|
||||
**Rejected**: User feedback: "There's no reason to just rename variables without consolidating functionality"
|
||||
|
||||
This would make names clearer but wouldn't reduce the number of variables users need to set. The real problem is requiring users to set both ENABLE_OFFLINE_ACCESS and VECTOR_SYNC_ENABLED when they just want semantic search.
|
||||
|
||||
### Alternative 2: Remove ENABLE_OFFLINE_ACCESS Entirely
|
||||
|
||||
**Rejected**: Advanced users need background operations without semantic search
|
||||
|
||||
Some deployments might want background token storage for future features (background Deck sync, background Calendar sync, etc.) without enabling semantic search. Keeping ENABLE_BACKGROUND_OPERATIONS (renamed) allows this.
|
||||
|
||||
### Alternative 3: Always Auto-Enable Background Operations
|
||||
|
||||
**Rejected**: Single-user mode doesn't need background token storage
|
||||
|
||||
Auto-enablement is only needed in multi-user modes. Single-user mode uses a shared client with BasicAuth, so background token storage is unnecessary. Always enabling it would waste resources and create confusing log messages.
|
||||
|
||||
### Alternative 4: Require All New Names Immediately
|
||||
|
||||
**Rejected**: Breaking change would affect all existing deployments
|
||||
|
||||
Forcing migration to new variable names in v0.6.0 would break every existing deployment. Supporting both old and new names with deprecation warnings provides a smooth migration path.
|
||||
|
||||
## References
|
||||
|
||||
- [ADR-020: Deployment Modes and Configuration Validation](ADR-020-deployment-modes-and-configuration-validation.md)
|
||||
- [ADR-002: Vector Sync Authentication](ADR-002-vector-sync-authentication.md)
|
||||
- [ADR-004: Progressive Consent](ADR-004-mcp-application-oauth.md)
|
||||
- [Issue: Configuration complexity for multi-user semantic search](https://github.com/cbcoutinho/nextcloud-mcp-server/issues/XXX)
|
||||
|
||||
## Migration Examples
|
||||
|
||||
### Example 1: Single-User BasicAuth with Semantic Search
|
||||
|
||||
**Before**:
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
QDRANT_LOCATION=:memory:
|
||||
```
|
||||
|
||||
**After** (optional migration):
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
ENABLE_SEMANTIC_SEARCH=true # Renamed
|
||||
QDRANT_LOCATION=:memory:
|
||||
# Note: Background operations NOT auto-enabled (not needed in single-user mode)
|
||||
```
|
||||
|
||||
### Example 2: Multi-User OAuth with Semantic Search
|
||||
|
||||
**Before**:
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
```
|
||||
|
||||
**After** (simplified):
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience # Explicit (optional)
|
||||
ENABLE_SEMANTIC_SEARCH=true # Auto-enables background operations
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
# Note: ENABLE_OFFLINE_ACCESS no longer needed (auto-enabled)
|
||||
```
|
||||
|
||||
### Example 3: Multi-User OAuth WITHOUT Semantic Search
|
||||
|
||||
**Before**:
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_OFFLINE_ACCESS=true # For future background features
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
```
|
||||
|
||||
**After** (optional migration):
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
ENABLE_BACKGROUND_OPERATIONS=true # Renamed for clarity
|
||||
TOKEN_ENCRYPTION_KEY=<key>
|
||||
TOKEN_STORAGE_DB=/path/to/tokens.db
|
||||
```
|
||||
@@ -1,104 +0,0 @@
|
||||
# MCP 1.23.x DNS Rebinding Protection Fix
|
||||
|
||||
## Problem
|
||||
|
||||
MCP Python SDK 1.23.0 introduced **automatic DNS rebinding protection** that breaks containerized deployments (Kubernetes, Docker) when the protection is unintentionally auto-enabled.
|
||||
|
||||
### Root Cause
|
||||
|
||||
From `mcp/server/fastmcp/server.py:177-183` in the Python SDK:
|
||||
|
||||
```python
|
||||
# Auto-enable DNS rebinding protection for localhost (IPv4 and IPv6)
|
||||
if transport_security is None and host in ("127.0.0.1", "localhost", "::1"):
|
||||
transport_security = TransportSecuritySettings(
|
||||
enable_dns_rebinding_protection=True,
|
||||
allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"],
|
||||
allowed_origins=["http://127.0.0.1:*", "http://localhost:*", "http://[::1]:*"],
|
||||
)
|
||||
```
|
||||
|
||||
### What Was Happening
|
||||
|
||||
1. **FastMCP initialization** in `app.py` didn't pass `host` or `transport_security` parameters
|
||||
2. **Defaults applied**: `host="127.0.0.1"`, `transport_security=None`
|
||||
3. **Auto-enablement triggered**: Condition `transport_security is None and host == "127.0.0.1"` was TRUE
|
||||
4. **Protection activated** with `allowed_hosts=["127.0.0.1:*", "localhost:*", "[::1]:*"]`
|
||||
5. **Kubernetes requests rejected**: `Host: nextcloud-mcp-server.default.svc.cluster.local:8000` didn't match allowed hosts
|
||||
|
||||
### Why `--host 0.0.0.0` Didn't Help
|
||||
|
||||
The `--host` CLI flag (used in Dockerfile/docker-compose) controls **uvicorn's bind address**, NOT the **FastMCP `host` parameter**. These are separate concerns:
|
||||
|
||||
- **Uvicorn bind address** (`--host 0.0.0.0`): Where the HTTP server listens
|
||||
- **FastMCP host parameter** (defaulted to `"127.0.0.1"`): Used for auto-enablement logic
|
||||
|
||||
## Solution
|
||||
|
||||
Explicitly disable DNS rebinding protection by passing `transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)` to all FastMCP instances.
|
||||
|
||||
### Changes Made
|
||||
|
||||
Modified `nextcloud_mcp_server/app.py`:
|
||||
|
||||
1. **Import** `TransportSecuritySettings` from `mcp.server.transport_security`
|
||||
2. **Updated all three FastMCP initializations**:
|
||||
- OAuth mode (line 1015)
|
||||
- Smithery stateless mode (line 1030)
|
||||
- BasicAuth mode (line 1040)
|
||||
|
||||
Each now includes:
|
||||
```python
|
||||
transport_security=TransportSecuritySettings(enable_dns_rebinding_protection=False)
|
||||
```
|
||||
|
||||
## Impact
|
||||
|
||||
### ✅ What This Fixes
|
||||
|
||||
- **Kubernetes deployments**: Requests with k8s service DNS names now work
|
||||
- **Docker deployments**: Port-mapped requests (localhost:8000 → container) now work
|
||||
- **Reverse proxy deployments**: Proxied requests with various Host headers now work
|
||||
- **Ingress controllers**: Requests via ingress hostnames now work
|
||||
|
||||
### 🔒 Security Considerations
|
||||
|
||||
DNS rebinding protection defends against attacks where:
|
||||
1. Attacker controls a DNS domain (e.g., `evil.com`)
|
||||
2. DNS initially resolves to attacker's IP
|
||||
3. After victim's browser caches the origin, DNS changes to victim's localhost
|
||||
4. Attacker's page can now make requests to victim's localhost services
|
||||
|
||||
**Why it's safe to disable for this deployment:**
|
||||
|
||||
1. **OAuth authentication required** in production deployments (ADR-002, ADR-004)
|
||||
2. **Network-level isolation** in containerized environments (k8s network policies, Docker networks)
|
||||
3. **MCP is server-to-server**, not exposed to browsers (no CORS concerns)
|
||||
4. **Host header validation inappropriate** for multi-tenant k8s environments
|
||||
|
||||
If DNS rebinding protection is needed for specific deployments, it can be re-enabled with a custom allowed hosts list:
|
||||
|
||||
```python
|
||||
transport_security=TransportSecuritySettings(
|
||||
enable_dns_rebinding_protection=True,
|
||||
allowed_hosts=[
|
||||
"nextcloud-mcp-server.default.svc.cluster.local:*",
|
||||
"mcp.example.com:*",
|
||||
# Add all your expected Host header values
|
||||
]
|
||||
)
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
- ✅ Ruff linting passes
|
||||
- ✅ Type checking passes (pre-existing warnings unrelated)
|
||||
- ✅ Module imports successfully
|
||||
- ✅ Compatible with MCP 1.23.x
|
||||
|
||||
## References
|
||||
|
||||
- [MCP Python SDK 1.23.0 Release](https://github.com/modelcontextprotocol/python-sdk/releases/tag/v1.23.0)
|
||||
- Commit: `d3a1841` - "Auto-enable DNS rebinding protection for localhost servers"
|
||||
- Issue #373 (original report of k8s breakage)
|
||||
- PR #382 (MCP 1.23.x upgrade)
|
||||
@@ -140,93 +140,6 @@ Basic Authentication uses username and password credentials directly.
|
||||
- [Configuration](configuration.md#basic-authentication-legacy) - BasicAuth environment variables
|
||||
- [Running the Server](running.md#basicauth-mode-legacy) - BasicAuth examples
|
||||
|
||||
## Hybrid Authentication (Multi-User BasicAuth + OAuth)
|
||||
|
||||
When running in multi-user BasicAuth mode with `ENABLE_OFFLINE_ACCESS=true`, the server operates in **hybrid authentication mode**. This provides the simplicity of BasicAuth for normal operations with the security of OAuth for administrative functions.
|
||||
|
||||
### Authentication Domains
|
||||
|
||||
**MCP Operations** (Tools, Resources):
|
||||
- **Auth Method**: BasicAuth (HTTP Basic username/password)
|
||||
- **Characteristics**:
|
||||
- Stateless - no token storage
|
||||
- Simple configuration
|
||||
- Direct credential validation against Nextcloud
|
||||
- Credentials passed per-request in Authorization header
|
||||
- **Used For**: MCP tool calls from Claude, MCP client operations
|
||||
|
||||
**Management APIs** (Webhooks, Admin UI):
|
||||
- **Auth Method**: OAuth bearer tokens
|
||||
- **Characteristics**:
|
||||
- Per-user authorization via OAuth consent flow
|
||||
- Refresh tokens stored for background operations
|
||||
- Token validation via UnifiedTokenVerifier
|
||||
- Explicit user consent required
|
||||
- **Used For**: Astrolabe admin UI, webhook management, vector sync operations
|
||||
|
||||
### Configuration
|
||||
|
||||
```env
|
||||
# Enable multi-user BasicAuth
|
||||
ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
|
||||
# Enable hybrid mode (OAuth provisioning for management APIs)
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
|
||||
# Enable background sync (required for hybrid mode currently)
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
# Encryption key for refresh token storage
|
||||
TOKEN_ENCRYPTION_KEY=<base64-encoded-key>
|
||||
|
||||
# Nextcloud connection
|
||||
NEXTCLOUD_HOST=https://cloud.example.com
|
||||
|
||||
# OAuth credentials (optional - uses DCR if not set)
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=<client-id>
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=<client-secret>
|
||||
```
|
||||
|
||||
### OAuth Provisioning Flow
|
||||
|
||||
1. Admin opens Astrolabe admin settings in Nextcloud
|
||||
2. Clicks "Authorize" to enable webhook management
|
||||
3. Redirected to `/oauth/authorize-nextcloud` on MCP server
|
||||
4. MCP server redirects to Nextcloud OAuth consent page
|
||||
5. Admin grants OAuth consent (scopes: `openid`, `profile`, `offline_access`)
|
||||
6. Redirected back to `/oauth/callback` on MCP server
|
||||
7. MCP server stores refresh token (encrypted)
|
||||
8. Admin can now manage webhooks from Astrolabe UI
|
||||
|
||||
### Benefits
|
||||
|
||||
- **Simple MCP client setup**: Use BasicAuth (no OAuth complexity for end users)
|
||||
- **Secure background operations**: Webhooks use per-user OAuth tokens (no shared credentials)
|
||||
- **Explicit authorization**: Admins must explicitly grant OAuth consent for webhook operations
|
||||
- **Per-user isolation**: Each admin's webhook operations use their own refresh token
|
||||
|
||||
### Trade-offs
|
||||
|
||||
- **Two auth systems**: More complex server configuration than pure BasicAuth or OAuth
|
||||
- **OAuth setup required**: Admins must complete OAuth flow before managing webhooks
|
||||
- **Token storage**: Requires database and encryption key for refresh tokens
|
||||
|
||||
### Comparison
|
||||
|
||||
| Feature | Pure BasicAuth | Hybrid Mode | Pure OAuth |
|
||||
|---------|---------------|-------------|------------|
|
||||
| MCP Operations | BasicAuth | BasicAuth | OAuth Bearer Token |
|
||||
| Management API | N/A | OAuth Bearer Token | OAuth Bearer Token |
|
||||
| Webhook Operations | N/A | OAuth Refresh Token | OAuth Refresh Token |
|
||||
| MCP Client Setup | Simple | Simple | Complex (PKCE flow) |
|
||||
| Admin UI Auth | N/A | OAuth Consent | OAuth Login |
|
||||
| Token Storage | None | Refresh tokens only | All tokens |
|
||||
| Deployment Complexity | Low | Medium | High |
|
||||
|
||||
### See Also
|
||||
- [OAuth Architecture](oauth-architecture.md) - Progressive Consent (Flow 2) details
|
||||
- [Configuration](configuration.md#enable_offline_access) - Hybrid mode configuration
|
||||
|
||||
## Mode Detection
|
||||
|
||||
The server automatically detects the authentication mode:
|
||||
|
||||
@@ -1,564 +0,0 @@
|
||||
# Configuration Migration Guide v2
|
||||
|
||||
**Version:** v0.58.0
|
||||
**Status:** Active
|
||||
**Related ADR:** [ADR-021: Configuration Consolidation and Simplification](ADR-021-configuration-consolidation.md)
|
||||
|
||||
## Overview
|
||||
|
||||
This guide helps you migrate from the old configuration variables to the new consolidated approach introduced in v0.58.0.
|
||||
|
||||
**Key Changes:**
|
||||
- `VECTOR_SYNC_ENABLED` → `ENABLE_SEMANTIC_SEARCH`
|
||||
- `ENABLE_OFFLINE_ACCESS` → `ENABLE_BACKGROUND_OPERATIONS`
|
||||
- New: `MCP_DEPLOYMENT_MODE` for explicit mode selection
|
||||
- Automatic dependency resolution: semantic search auto-enables background operations
|
||||
|
||||
**Backward Compatibility:**
|
||||
- Old variable names still work in v0.58.0+
|
||||
- Deprecation warnings logged when old names used
|
||||
- Old names will be removed in v1.0.0
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference: Variable Name Changes
|
||||
|
||||
| Old Name | New Name | Status |
|
||||
|----------|----------|--------|
|
||||
| `VECTOR_SYNC_ENABLED` | `ENABLE_SEMANTIC_SEARCH` | Deprecated |
|
||||
| `ENABLE_OFFLINE_ACCESS` | `ENABLE_BACKGROUND_OPERATIONS` | Deprecated |
|
||||
| N/A (auto-detected) | `MCP_DEPLOYMENT_MODE` | New (optional) |
|
||||
|
||||
**Tuning parameters unchanged:**
|
||||
- `VECTOR_SYNC_SCAN_INTERVAL` - Keep as-is
|
||||
- `VECTOR_SYNC_PROCESSOR_WORKERS` - Keep as-is
|
||||
- `VECTOR_SYNC_QUEUE_MAX_SIZE` - Keep as-is
|
||||
|
||||
---
|
||||
|
||||
## Migration Scenarios
|
||||
|
||||
### Scenario 1: Single-User BasicAuth with Semantic Search
|
||||
|
||||
**Before (v0.57.x):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
QDRANT_LOCATION=:memory:
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
**After (v0.58.0+):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
|
||||
# Optional: Explicit mode declaration (recommended)
|
||||
MCP_DEPLOYMENT_MODE=single_user_basic
|
||||
|
||||
# Updated variable name
|
||||
ENABLE_SEMANTIC_SEARCH=true # Previously VECTOR_SYNC_ENABLED
|
||||
|
||||
QDRANT_LOCATION=:memory:
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
**What Changed:**
|
||||
- ✅ Renamed `VECTOR_SYNC_ENABLED` to `ENABLE_SEMANTIC_SEARCH`
|
||||
- ✅ Added optional `MCP_DEPLOYMENT_MODE` for clarity
|
||||
- ✅ Background operations NOT auto-enabled (not needed in single-user mode)
|
||||
|
||||
**Migration Steps:**
|
||||
1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
|
||||
2. Optionally add `MCP_DEPLOYMENT_MODE=single_user_basic`
|
||||
3. Restart server
|
||||
4. Verify deprecation warnings are gone
|
||||
|
||||
---
|
||||
|
||||
### Scenario 2: Multi-User OAuth with Semantic Search
|
||||
|
||||
**Before (v0.57.x):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# Both variables required - confusing!
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
```
|
||||
|
||||
**After (v0.58.0+ - Simplified):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# Optional: Explicit mode declaration
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# One variable does it all!
|
||||
ENABLE_SEMANTIC_SEARCH=true # Automatically enables background operations
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
|
||||
# Note: ENABLE_OFFLINE_ACCESS no longer needed!
|
||||
# Background operations are auto-enabled by ENABLE_SEMANTIC_SEARCH
|
||||
```
|
||||
|
||||
**What Changed:**
|
||||
- ✅ Removed need for explicit `ENABLE_OFFLINE_ACCESS`
|
||||
- ✅ `ENABLE_SEMANTIC_SEARCH` automatically enables background operations in multi-user modes
|
||||
- ✅ Renamed `VECTOR_SYNC_ENABLED` to `ENABLE_SEMANTIC_SEARCH`
|
||||
- ✅ Added optional explicit mode declaration
|
||||
|
||||
**Migration Steps:**
|
||||
1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
|
||||
2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
|
||||
3. Optionally add `MCP_DEPLOYMENT_MODE=oauth_single_audience`
|
||||
4. Restart server
|
||||
5. Check logs for confirmation: "Automatically enabled background operations for semantic search"
|
||||
|
||||
---
|
||||
|
||||
### Scenario 3: Multi-User OAuth WITHOUT Semantic Search
|
||||
|
||||
**Before (v0.57.x):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# Enable background operations for future features
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
```
|
||||
|
||||
**After (v0.58.0+):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# Optional: Explicit mode declaration
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# Renamed for clarity
|
||||
ENABLE_BACKGROUND_OPERATIONS=true # Previously ENABLE_OFFLINE_ACCESS
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
```
|
||||
|
||||
**What Changed:**
|
||||
- ✅ Renamed `ENABLE_OFFLINE_ACCESS` to `ENABLE_BACKGROUND_OPERATIONS`
|
||||
- ✅ Added optional explicit mode declaration
|
||||
|
||||
**Migration Steps:**
|
||||
1. Replace `ENABLE_OFFLINE_ACCESS=true` with `ENABLE_BACKGROUND_OPERATIONS=true`
|
||||
2. Optionally add `MCP_DEPLOYMENT_MODE=oauth_single_audience`
|
||||
3. Restart server
|
||||
|
||||
---
|
||||
|
||||
### Scenario 4: Multi-User BasicAuth with Semantic Search
|
||||
|
||||
**Before (v0.57.x):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
|
||||
# Both required - redundant
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
```
|
||||
|
||||
**After (v0.58.0+ - Simplified):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
|
||||
# Optional: Explicit mode declaration
|
||||
MCP_DEPLOYMENT_MODE=multi_user_basic
|
||||
|
||||
# One variable handles both!
|
||||
ENABLE_SEMANTIC_SEARCH=true # Auto-enables background operations
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=mcp-server
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=secret
|
||||
|
||||
# Note: ENABLE_OFFLINE_ACCESS no longer needed!
|
||||
```
|
||||
|
||||
**What Changed:**
|
||||
- ✅ Semantic search auto-enables background operations
|
||||
- ✅ Removed need for explicit `ENABLE_OFFLINE_ACCESS`
|
||||
- ✅ Clearer variable naming
|
||||
|
||||
**Migration Steps:**
|
||||
1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
|
||||
2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
|
||||
3. Optionally add `MCP_DEPLOYMENT_MODE=multi_user_basic`
|
||||
4. Restart server
|
||||
|
||||
---
|
||||
|
||||
### Scenario 5: Token Exchange Mode with Semantic Search
|
||||
|
||||
**Before (v0.57.x):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_TOKEN_EXCHANGE=true
|
||||
|
||||
# Both required
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
TOKEN_EXCHANGE_CACHE_TTL=300
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
**After (v0.58.0+ - Simplified):**
|
||||
```bash
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
ENABLE_TOKEN_EXCHANGE=true
|
||||
|
||||
# Optional: Explicit mode declaration
|
||||
MCP_DEPLOYMENT_MODE=oauth_token_exchange
|
||||
|
||||
# One variable!
|
||||
ENABLE_SEMANTIC_SEARCH=true # Auto-enables background operations
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
TOKEN_EXCHANGE_CACHE_TTL=300
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
**What Changed:**
|
||||
- ✅ Semantic search auto-enables background operations
|
||||
- ✅ Explicit mode declaration available
|
||||
|
||||
**Migration Steps:**
|
||||
1. Replace `VECTOR_SYNC_ENABLED=true` with `ENABLE_SEMANTIC_SEARCH=true`
|
||||
2. Remove `ENABLE_OFFLINE_ACCESS=true` (auto-enabled)
|
||||
3. Optionally add `MCP_DEPLOYMENT_MODE=oauth_token_exchange`
|
||||
4. Restart server
|
||||
|
||||
---
|
||||
|
||||
## Understanding Automatic Dependency Resolution
|
||||
|
||||
### How It Works
|
||||
|
||||
In v0.58.0+, the server uses smart dependency resolution:
|
||||
|
||||
```python
|
||||
# In multi-user modes (OAuth, Multi-User BasicAuth):
|
||||
if ENABLE_SEMANTIC_SEARCH == true:
|
||||
background_operations = automatically enabled
|
||||
refresh_tokens = automatically requested
|
||||
token_storage = required (TOKEN_ENCRYPTION_KEY, TOKEN_STORAGE_DB)
|
||||
oauth_credentials = required (for app password retrieval)
|
||||
```
|
||||
|
||||
**What this means:**
|
||||
- ✅ Set `ENABLE_SEMANTIC_SEARCH=true`
|
||||
- ✅ Provide required infrastructure (Qdrant, Ollama, encryption key)
|
||||
- ✅ System automatically enables background operations
|
||||
- ❌ No need to set `ENABLE_BACKGROUND_OPERATIONS` separately
|
||||
|
||||
### When Automatic Enablement Happens
|
||||
|
||||
| Deployment Mode | Semantic Search Enabled | Background Operations Auto-Enabled? |
|
||||
|----------------|------------------------|-----------------------------------|
|
||||
| Single-User BasicAuth | ✅ | ❌ No (not needed) |
|
||||
| Multi-User BasicAuth | ✅ | ✅ Yes |
|
||||
| OAuth Single-Audience | ✅ | ✅ Yes |
|
||||
| OAuth Token Exchange | ✅ | ✅ Yes |
|
||||
| Smithery Stateless | N/A (not supported) | N/A |
|
||||
|
||||
### When to Explicitly Set ENABLE_BACKGROUND_OPERATIONS
|
||||
|
||||
Only needed when you want background operations **without** semantic search:
|
||||
|
||||
```bash
|
||||
# Example: OAuth mode with background operations but NO semantic search
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# Explicitly enable background operations for future features
|
||||
ENABLE_BACKGROUND_OPERATIONS=true
|
||||
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# Semantic search disabled
|
||||
ENABLE_SEMANTIC_SEARCH=false
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Explicit Mode Selection
|
||||
|
||||
### Why Use MCP_DEPLOYMENT_MODE?
|
||||
|
||||
**Benefits:**
|
||||
- ✅ Removes ambiguity about which mode is active
|
||||
- ✅ Validation errors reference specific mode requirements
|
||||
- ✅ Catches configuration mistakes early
|
||||
- ✅ Self-documenting configuration
|
||||
|
||||
**Example:**
|
||||
```bash
|
||||
# Without explicit mode:
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
# Is this OAuth or Multi-User BasicAuth? Not immediately clear.
|
||||
|
||||
# With explicit mode:
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
# Clear: This is OAuth mode
|
||||
```
|
||||
|
||||
### Valid Mode Values
|
||||
|
||||
| Mode Value | Description |
|
||||
|-----------|-------------|
|
||||
| `single_user_basic` | Single-user with username/password |
|
||||
| `multi_user_basic` | Multi-user with BasicAuth pass-through |
|
||||
| `oauth_single_audience` | Multi-user OAuth (recommended) |
|
||||
| `oauth_token_exchange` | Multi-user OAuth with token exchange |
|
||||
| `smithery` | Smithery platform deployment |
|
||||
|
||||
### Mode Detection Priority
|
||||
|
||||
When `MCP_DEPLOYMENT_MODE` is set:
|
||||
1. ✅ Explicit mode is used
|
||||
2. ✅ Server validates configuration matches explicit mode
|
||||
3. ❌ Auto-detection is skipped
|
||||
|
||||
When `MCP_DEPLOYMENT_MODE` is NOT set:
|
||||
1. ✅ Auto-detection runs (existing behavior)
|
||||
2. ✅ Priority: Smithery → Token Exchange → Multi-User BasicAuth → Single-User BasicAuth → OAuth Single-Audience
|
||||
|
||||
---
|
||||
|
||||
## Validation and Error Messages
|
||||
|
||||
### Old Validation (v0.57.x)
|
||||
|
||||
```
|
||||
Error: [multi_user_basic] ENABLE_OFFLINE_ACCESS is required when VECTOR_SYNC_ENABLED is enabled
|
||||
```
|
||||
|
||||
**Problem:** User must understand internal dependency relationship
|
||||
|
||||
### New Validation (v0.58.0+)
|
||||
|
||||
```
|
||||
Error: [multi_user_basic] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
|
||||
```
|
||||
|
||||
**Benefit:** Clear what's needed, no mention of internal ENABLE_BACKGROUND_OPERATIONS flag
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting Migration
|
||||
|
||||
### Issue: Deprecation Warning After Migration
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
WARNING: VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead.
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
1. Check for `VECTOR_SYNC_ENABLED` in `.env` file
|
||||
2. Replace with `ENABLE_SEMANTIC_SEARCH`
|
||||
3. Search for any scripts/CI configs using old name
|
||||
4. Restart server
|
||||
|
||||
### Issue: Both Old and New Names Set
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
WARNING: Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. Using ENABLE_SEMANTIC_SEARCH.
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
1. Remove `VECTOR_SYNC_ENABLED` from `.env`
|
||||
2. Keep `ENABLE_SEMANTIC_SEARCH`
|
||||
3. Restart server
|
||||
|
||||
### Issue: Missing Required Dependencies
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: [oauth_single_audience] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
|
||||
```
|
||||
|
||||
**Solution:**
|
||||
When semantic search is enabled in multi-user modes, you need:
|
||||
- `TOKEN_ENCRYPTION_KEY` - Generate with: `python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"`
|
||||
- `TOKEN_STORAGE_DB` - Path to SQLite database (e.g., `/app/data/tokens.db`)
|
||||
- `NEXTCLOUD_OIDC_CLIENT_ID` and `NEXTCLOUD_OIDC_CLIENT_SECRET` - For app password retrieval
|
||||
|
||||
### Issue: Unexpected Mode Detected
|
||||
|
||||
**Symptom:**
|
||||
Server activates `oauth_single_audience` mode when you expected `multi_user_basic`
|
||||
|
||||
**Solution:**
|
||||
Add explicit mode declaration:
|
||||
```bash
|
||||
MCP_DEPLOYMENT_MODE=multi_user_basic
|
||||
ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing Your Migration
|
||||
|
||||
### Step 1: Verify Configuration
|
||||
|
||||
```bash
|
||||
# Set new variable names in .env
|
||||
cat .env | grep -E "(ENABLE_SEMANTIC_SEARCH|ENABLE_BACKGROUND_OPERATIONS|MCP_DEPLOYMENT_MODE)"
|
||||
```
|
||||
|
||||
### Step 2: Check for Old Variable Names
|
||||
|
||||
```bash
|
||||
# Should return nothing after migration
|
||||
cat .env | grep -E "(VECTOR_SYNC_ENABLED|ENABLE_OFFLINE_ACCESS)"
|
||||
```
|
||||
|
||||
### Step 3: Start Server and Check Logs
|
||||
|
||||
```bash
|
||||
# Start server
|
||||
docker-compose up mcp
|
||||
|
||||
# Look for:
|
||||
# 1. No deprecation warnings
|
||||
# 2. Correct mode detected
|
||||
# 3. Auto-enablement messages (if using semantic search in multi-user mode)
|
||||
```
|
||||
|
||||
**Expected Log Output (Multi-User OAuth + Semantic Search):**
|
||||
```
|
||||
INFO: Using explicit deployment mode: oauth_single_audience
|
||||
INFO: Automatically enabled background operations for semantic search in multi-user mode.
|
||||
INFO: Vector sync enabled. Starting background scanner...
|
||||
```
|
||||
|
||||
### Step 4: Verify Functionality
|
||||
|
||||
Test that existing features still work:
|
||||
- [ ] Semantic search returns results
|
||||
- [ ] Background indexing runs
|
||||
- [ ] OAuth flow completes successfully
|
||||
- [ ] Refresh tokens are stored/retrieved
|
||||
|
||||
---
|
||||
|
||||
## Quick Start Templates
|
||||
|
||||
We provide mode-specific templates for new deployments:
|
||||
|
||||
| Template | Use Case |
|
||||
|----------|----------|
|
||||
| `env.sample.single-user` | Simplest setup |
|
||||
| `env.sample.oauth-multi-user` | Recommended multi-user |
|
||||
| `env.sample.oauth-advanced` | Token exchange mode |
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
cp env.sample.oauth-multi-user .env
|
||||
# Edit .env with your values
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Timeline and Support
|
||||
|
||||
| Version | Status | Old Variable Support |
|
||||
|---------|--------|---------------------|
|
||||
| v0.57.x | Stable | Old names only |
|
||||
| v0.58.0 | Current | Both old and new (with warnings) |
|
||||
| v1.0.0 | Breaking | New names only |
|
||||
|
||||
**Recommendation:** Migrate before v1.0.0 (12+ months minimum)
|
||||
|
||||
---
|
||||
|
||||
## Getting Help
|
||||
|
||||
If you encounter issues during migration:
|
||||
|
||||
1. **Check the logs** - Look for deprecation warnings and error messages
|
||||
2. **Review ADR-021** - See [docs/ADR-021-configuration-consolidation.md](ADR-021-configuration-consolidation.md)
|
||||
3. **Use mode-specific templates** - See `env.sample.*` files
|
||||
4. **File an issue** - Include your `.env` (redacted), logs, and mode
|
||||
|
||||
---
|
||||
|
||||
## Summary
|
||||
|
||||
**What You Need to Do:**
|
||||
1. ✅ Rename `VECTOR_SYNC_ENABLED` → `ENABLE_SEMANTIC_SEARCH`
|
||||
2. ✅ (Optional) Rename `ENABLE_OFFLINE_ACCESS` → `ENABLE_BACKGROUND_OPERATIONS`
|
||||
3. ✅ (Recommended) Add `MCP_DEPLOYMENT_MODE` for clarity
|
||||
4. ✅ Remove redundant settings (semantic search auto-enables background ops in multi-user modes)
|
||||
5. ✅ Test your configuration
|
||||
|
||||
**What the Server Does Automatically:**
|
||||
- ✅ Supports both old and new variable names
|
||||
- ✅ Logs deprecation warnings for old names
|
||||
- ✅ Auto-enables background operations when semantic search is enabled in multi-user modes
|
||||
- ✅ Validates configuration and provides clear error messages
|
||||
|
||||
**Migration Timeline:**
|
||||
- Now → v1.0.0: Both old and new names work
|
||||
- v1.0.0+: Only new names supported
|
||||
|
||||
**Questions?** See [docs/configuration.md](configuration.md) or file an issue.
|
||||
+15
-129
@@ -2,82 +2,25 @@
|
||||
|
||||
The Nextcloud MCP server requires configuration to connect to your Nextcloud instance. Configuration is provided through environment variables, typically stored in a `.env` file.
|
||||
|
||||
> **Note:** Configuration was significantly simplified in v0.58.0. If you're upgrading from v0.57.x, see the [Configuration Migration Guide](configuration-migration-v2.md).
|
||||
|
||||
## Quick Start
|
||||
|
||||
We provide mode-specific configuration templates for quick setup:
|
||||
Create a `.env` file based on `env.sample`:
|
||||
|
||||
```bash
|
||||
# Choose a template based on your deployment mode:
|
||||
cp env.sample.single-user .env # Simplest - one user, local dev
|
||||
cp env.sample.oauth-multi-user .env # Recommended - multi-user OAuth
|
||||
cp env.sample.oauth-advanced .env # Advanced - token exchange mode
|
||||
|
||||
# Or start from the full example:
|
||||
cp env.sample .env
|
||||
|
||||
# Edit .env with your Nextcloud details
|
||||
```
|
||||
|
||||
Then choose your deployment mode:
|
||||
Then choose your authentication mode:
|
||||
|
||||
- [Single-User BasicAuth](#single-user-basicauth-mode) - Simplest for personal instances
|
||||
- [Multi-User OAuth](#multi-user-oauth-modes) - Recommended for production
|
||||
- [Deployment Mode Selection](#deployment-mode-selection) - Explicit mode declaration
|
||||
- [OAuth2/OIDC Configuration](#oauth2oidc-configuration) (Recommended)
|
||||
- [Basic Authentication Configuration](#basic-authentication-legacy)
|
||||
|
||||
---
|
||||
|
||||
## Deployment Mode Selection
|
||||
## OAuth2/OIDC Configuration
|
||||
|
||||
**New in v0.58.0:** You can explicitly declare your deployment mode to remove ambiguity and catch configuration errors early.
|
||||
|
||||
```dotenv
|
||||
# Optional but recommended
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
```
|
||||
|
||||
**Valid values:**
|
||||
- `single_user_basic` - Single-user with username/password
|
||||
- `multi_user_basic` - Multi-user with BasicAuth pass-through
|
||||
- `oauth_single_audience` - Multi-user OAuth (recommended)
|
||||
- `oauth_token_exchange` - Multi-user OAuth with token exchange
|
||||
- `smithery` - Smithery platform deployment
|
||||
|
||||
**Benefits:**
|
||||
- ✅ Clear which mode is active
|
||||
- ✅ Better validation error messages
|
||||
- ✅ Self-documenting configuration
|
||||
- ✅ Catches configuration mistakes early
|
||||
|
||||
**Auto-detection:** If `MCP_DEPLOYMENT_MODE` is not set, the server auto-detects the mode based on other settings (existing behavior).
|
||||
|
||||
See [Authentication Modes](authentication.md) for detailed comparison of deployment modes.
|
||||
|
||||
---
|
||||
|
||||
## Single-User BasicAuth Mode
|
||||
|
||||
BasicAuth with a single user is the simplest deployment mode. Use for personal instances, local development, and testing.
|
||||
|
||||
```dotenv
|
||||
# Minimal single-user configuration
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
|
||||
# Optional: Explicit mode declaration
|
||||
MCP_DEPLOYMENT_MODE=single_user_basic
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
> **Security Notice:** BasicAuth stores credentials in environment variables and is less secure than OAuth. Use OAuth for production multi-user deployments.
|
||||
|
||||
---
|
||||
|
||||
## Multi-User OAuth Modes
|
||||
|
||||
OAuth2/OIDC is the recommended authentication mode for production multi-user deployments.
|
||||
OAuth2/OIDC is the recommended authentication mode for production deployments.
|
||||
|
||||
### Minimal Configuration (Auto-registration)
|
||||
|
||||
@@ -85,9 +28,6 @@ OAuth2/OIDC is the recommended authentication mode for production multi-user dep
|
||||
# .env file for OAuth with auto-registration
|
||||
NEXTCLOUD_HOST=https://your.nextcloud.instance.com
|
||||
|
||||
# Optional: Explicit mode declaration (recommended)
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# Leave these EMPTY for OAuth mode
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
@@ -101,9 +41,6 @@ This minimal configuration uses dynamic client registration to automatically reg
|
||||
# .env file for OAuth with pre-configured client
|
||||
NEXTCLOUD_HOST=https://your.nextcloud.instance.com
|
||||
|
||||
# Optional: Explicit mode declaration (recommended)
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# OAuth Client Credentials (optional - auto-registers if not provided)
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
|
||||
@@ -173,50 +110,8 @@ NEXTCLOUD_PASSWORD=your_app_password_or_password
|
||||
|
||||
## Semantic Search Configuration (Optional)
|
||||
|
||||
**New in v0.58.0:** Simplified semantic search configuration with automatic dependency resolution.
|
||||
|
||||
The MCP server includes semantic search capabilities powered by vector embeddings. This feature requires a vector database (Qdrant) and an embedding service.
|
||||
|
||||
### Quick Start
|
||||
|
||||
**Single-User Mode:**
|
||||
```dotenv
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
|
||||
# Enable semantic search
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
|
||||
# Vector database
|
||||
QDRANT_LOCATION=:memory:
|
||||
|
||||
# Embedding provider
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
**Multi-User OAuth Mode:**
|
||||
```dotenv
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# Enable semantic search
|
||||
# In multi-user modes, this AUTOMATICALLY enables background operations!
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
|
||||
# Required for background operations (auto-enabled by semantic search)
|
||||
TOKEN_ENCRYPTION_KEY=your-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# Vector database
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
|
||||
# Embedding provider
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
```
|
||||
|
||||
> **Note:** In multi-user modes (OAuth, Multi-User BasicAuth), enabling `ENABLE_SEMANTIC_SEARCH` automatically enables background operations and refresh token storage. You don't need to set `ENABLE_BACKGROUND_OPERATIONS` separately!
|
||||
|
||||
### Qdrant Vector Database Modes
|
||||
|
||||
The server supports three Qdrant deployment modes:
|
||||
@@ -231,7 +126,7 @@ No configuration needed! If neither `QDRANT_URL` nor `QDRANT_LOCATION` is set, t
|
||||
|
||||
```dotenv
|
||||
# No Qdrant configuration needed - defaults to :memory:
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
@@ -250,7 +145,7 @@ For single-instance deployments that need persistence without a separate Qdrant
|
||||
```dotenv
|
||||
# Local persistent storage
|
||||
QDRANT_LOCATION=/app/data/qdrant # Or any writable path
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
@@ -271,7 +166,7 @@ For production deployments with a dedicated Qdrant service:
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
QDRANT_API_KEY=your-secret-api-key # Optional
|
||||
QDRANT_COLLECTION=nextcloud_content # Optional
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
```
|
||||
|
||||
**Pros:**
|
||||
@@ -388,15 +283,13 @@ Solutions:
|
||||
- Data corruption in Qdrant
|
||||
- Confusing error messages during indexing
|
||||
|
||||
### Background Indexing Configuration
|
||||
### Vector Sync Configuration
|
||||
|
||||
Control background indexing behavior:
|
||||
|
||||
```dotenv
|
||||
# Semantic search (ADR-007, ADR-021)
|
||||
ENABLE_SEMANTIC_SEARCH=true # Enable background indexing
|
||||
|
||||
# Tuning parameters (advanced - only modify if needed)
|
||||
# Vector sync settings (ADR-007)
|
||||
VECTOR_SYNC_ENABLED=true # Enable background indexing
|
||||
VECTOR_SYNC_SCAN_INTERVAL=300 # Scan interval in seconds (default: 5 minutes)
|
||||
VECTOR_SYNC_PROCESSOR_WORKERS=3 # Concurrent indexing workers (default: 3)
|
||||
VECTOR_SYNC_QUEUE_MAX_SIZE=10000 # Max queued documents (default: 10000)
|
||||
@@ -406,8 +299,6 @@ DOCUMENT_CHUNK_SIZE=512 # Words per chunk (default: 512)
|
||||
DOCUMENT_CHUNK_OVERLAP=50 # Overlapping words between chunks (default: 50)
|
||||
```
|
||||
|
||||
> **Note:** The `VECTOR_SYNC_*` tuning parameters keep their names as they're implementation details. Only the user-facing feature flag was renamed to `ENABLE_SEMANTIC_SEARCH`.
|
||||
|
||||
### Embedding Service Configuration
|
||||
|
||||
The server uses an embedding service to generate vector representations. Two options are available:
|
||||
@@ -478,11 +369,11 @@ DOCUMENT_CHUNK_OVERLAP=100
|
||||
|
||||
| Variable | Required | Default | Description |
|
||||
|----------|----------|---------|-------------|
|
||||
| `ENABLE_SEMANTIC_SEARCH` | ⚠️ Optional | `false` | Enable semantic search with background indexing (replaces `VECTOR_SYNC_ENABLED`) |
|
||||
| `QDRANT_URL` | ⚠️ Optional | - | Qdrant service URL (network mode) - mutually exclusive with `QDRANT_LOCATION` |
|
||||
| `QDRANT_LOCATION` | ⚠️ Optional | `:memory:` | Local Qdrant path (`:memory:` or `/path/to/data`) - mutually exclusive with `QDRANT_URL` |
|
||||
| `QDRANT_API_KEY` | ⚠️ Optional | - | Qdrant API key (network mode only) |
|
||||
| `QDRANT_COLLECTION` | ⚠️ Optional | Auto-generated | Qdrant collection name |
|
||||
| `QDRANT_COLLECTION` | ⚠️ Optional | `nextcloud_content` | Qdrant collection name |
|
||||
| `VECTOR_SYNC_ENABLED` | ⚠️ Optional | `false` | Enable background vector indexing |
|
||||
| `VECTOR_SYNC_SCAN_INTERVAL` | ⚠️ Optional | `300` | Document scan interval (seconds) |
|
||||
| `VECTOR_SYNC_PROCESSOR_WORKERS` | ⚠️ Optional | `3` | Concurrent indexing workers |
|
||||
| `VECTOR_SYNC_QUEUE_MAX_SIZE` | ⚠️ Optional | `10000` | Max queued documents |
|
||||
@@ -492,9 +383,6 @@ DOCUMENT_CHUNK_OVERLAP=100
|
||||
| `DOCUMENT_CHUNK_SIZE` | ⚠️ Optional | `512` | Words per chunk for document embedding |
|
||||
| `DOCUMENT_CHUNK_OVERLAP` | ⚠️ Optional | `50` | Overlapping words between chunks (must be < chunk size) |
|
||||
|
||||
**Deprecated variables (still functional):**
|
||||
- `VECTOR_SYNC_ENABLED` - Use `ENABLE_SEMANTIC_SEARCH` instead (will be removed in v1.0.0)
|
||||
|
||||
### Docker Compose Example
|
||||
|
||||
Enable network mode Qdrant with docker-compose:
|
||||
@@ -504,7 +392,7 @@ services:
|
||||
mcp:
|
||||
environment:
|
||||
- QDRANT_URL=http://qdrant:6333
|
||||
- ENABLE_SEMANTIC_SEARCH=true
|
||||
- VECTOR_SYNC_ENABLED=true
|
||||
|
||||
qdrant:
|
||||
image: qdrant/qdrant:latest
|
||||
@@ -657,7 +545,6 @@ uv run nextcloud-mcp-server --no-oauth \
|
||||
|
||||
## See Also
|
||||
|
||||
- [Configuration Migration Guide v2](configuration-migration-v2.md) - **New in v0.58.0:** Migrate from old variable names
|
||||
- [OAuth Quick Start](quickstart-oauth.md) - 5-minute OAuth setup for development
|
||||
- [OAuth Setup Guide](oauth-setup.md) - Detailed OAuth configuration for production
|
||||
- [OAuth Architecture](oauth-architecture.md) - How OAuth works in the MCP server
|
||||
@@ -666,4 +553,3 @@ uv run nextcloud-mcp-server --no-oauth \
|
||||
- [Running the Server](running.md) - Starting the server with different configurations
|
||||
- [Troubleshooting](troubleshooting.md) - Common configuration issues
|
||||
- [OAuth Troubleshooting](oauth-troubleshooting.md) - OAuth-specific troubleshooting
|
||||
- [ADR-021](ADR-021-configuration-consolidation.md) - Configuration consolidation architecture decision
|
||||
|
||||
@@ -1,301 +0,0 @@
|
||||
# Database Migrations
|
||||
|
||||
This document describes the database migration system for nextcloud-mcp-server's token storage database.
|
||||
|
||||
## Overview
|
||||
|
||||
The token storage database uses [Alembic](https://alembic.sqlalchemy.org/) for schema versioning and migrations. Alembic provides:
|
||||
|
||||
- **Version Control**: Track schema changes in Git
|
||||
- **Rollback Support**: Safely downgrade schema if needed
|
||||
- **Audit Trail**: Migration files serve as schema changelog
|
||||
- **Automated Upgrades**: Database schema updates automatically on startup
|
||||
|
||||
## Architecture
|
||||
|
||||
### Migration Strategy
|
||||
|
||||
The system handles three scenarios:
|
||||
|
||||
1. **New Database**: Runs migrations from scratch to create all tables
|
||||
2. **Pre-Alembic Database**: Stamps existing database with initial revision (no changes)
|
||||
3. **Alembic-Managed Database**: Upgrades to latest version automatically
|
||||
|
||||
### Directory Structure
|
||||
|
||||
```
|
||||
nextcloud-mcp-server/
|
||||
├── alembic/ # Alembic migrations
|
||||
│ ├── versions/ # Migration scripts
|
||||
│ │ └── 20251217_2200_001_initial_schema.py
|
||||
│ ├── env.py # Alembic environment
|
||||
│ ├── script.py.mako # Migration template
|
||||
│ └── README # Migration usage guide
|
||||
├── alembic.ini # Alembic configuration
|
||||
└── nextcloud_mcp_server/
|
||||
├── auth/storage.py # Uses migrations on init
|
||||
└── migrations.py # Migration utilities
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Automatic Migration on Startup
|
||||
|
||||
Migrations run automatically when the server starts:
|
||||
|
||||
```bash
|
||||
uv run nextcloud-mcp-server
|
||||
```
|
||||
|
||||
The `RefreshTokenStorage.initialize()` method:
|
||||
1. Checks if database is Alembic-managed
|
||||
2. Stamps pre-Alembic databases with initial revision
|
||||
3. Upgrades to latest version
|
||||
|
||||
### Manual Migration Commands
|
||||
|
||||
```bash
|
||||
# Show current database version
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Upgrade database to latest version
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Downgrade by one version (emergency use only)
|
||||
uv run nextcloud-mcp-server db downgrade
|
||||
|
||||
# Specify custom database path
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
### Environment Variables
|
||||
|
||||
- `TOKEN_STORAGE_DB`: Path to database file (default: `/app/data/tokens.db`)
|
||||
|
||||
## Creating Migrations (Developers)
|
||||
|
||||
### Step 1: Create Migration File
|
||||
|
||||
```bash
|
||||
uv run nextcloud-mcp-server db migrate "add user preferences table"
|
||||
```
|
||||
|
||||
This creates a new migration file in `alembic/versions/` with empty `upgrade()` and `downgrade()` functions.
|
||||
|
||||
### Step 2: Write Migration SQL
|
||||
|
||||
Since we don't use SQLAlchemy models, write raw SQL:
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
"""Add user preferences table."""
|
||||
op.execute("""
|
||||
CREATE TABLE user_preferences (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
theme TEXT DEFAULT 'light',
|
||||
language TEXT DEFAULT 'en',
|
||||
created_at INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
op.execute("""
|
||||
CREATE INDEX idx_user_preferences_user_id
|
||||
ON user_preferences(user_id)
|
||||
""")
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Remove user preferences table."""
|
||||
op.execute("DROP INDEX IF EXISTS idx_user_preferences_user_id")
|
||||
op.execute("DROP TABLE IF EXISTS user_preferences")
|
||||
```
|
||||
|
||||
### Step 3: Test Migration
|
||||
|
||||
```bash
|
||||
# Test upgrade
|
||||
uv run nextcloud-mcp-server db upgrade -d /tmp/test.db
|
||||
|
||||
# Verify schema
|
||||
sqlite3 /tmp/test.db ".schema"
|
||||
|
||||
# Test downgrade
|
||||
uv run nextcloud-mcp-server db downgrade -d /tmp/test.db
|
||||
|
||||
# Verify removal
|
||||
sqlite3 /tmp/test.db ".schema"
|
||||
```
|
||||
|
||||
### Step 4: Commit Migration
|
||||
|
||||
```bash
|
||||
git add alembic/versions/YYYYMMDD_HHMM_XXX_description.py
|
||||
git commit -m "feat: add user preferences table migration"
|
||||
```
|
||||
|
||||
## SQLite Limitations
|
||||
|
||||
SQLite has limited `ALTER TABLE` support:
|
||||
|
||||
### Supported Operations
|
||||
|
||||
- ✅ Add columns: `ALTER TABLE table ADD COLUMN ...`
|
||||
- ✅ Rename table: `ALTER TABLE old RENAME TO new`
|
||||
- ✅ Rename column: `ALTER TABLE table RENAME COLUMN old TO new` (SQLite 3.25+)
|
||||
|
||||
### Unsupported Operations (Requires Table Recreation)
|
||||
|
||||
- ❌ Drop column
|
||||
- ❌ Change column type
|
||||
- ❌ Add constraints to existing columns
|
||||
|
||||
### Table Recreation Pattern
|
||||
|
||||
For complex schema changes:
|
||||
|
||||
```python
|
||||
def upgrade() -> None:
|
||||
# Create new table with desired schema
|
||||
op.execute("""
|
||||
CREATE TABLE refresh_tokens_new (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
new_field TEXT, -- New column
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Copy data from old table
|
||||
op.execute("""
|
||||
INSERT INTO refresh_tokens_new
|
||||
(user_id, encrypted_token, expires_at, created_at)
|
||||
SELECT user_id, encrypted_token, expires_at, created_at
|
||||
FROM refresh_tokens
|
||||
""")
|
||||
|
||||
# Drop old table and rename new table
|
||||
op.execute("DROP TABLE refresh_tokens")
|
||||
op.execute("ALTER TABLE refresh_tokens_new RENAME TO refresh_tokens")
|
||||
|
||||
# Recreate indexes
|
||||
op.execute("CREATE INDEX idx_user_id ON refresh_tokens(user_id)")
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
- **Migrations**: `YYYYMMDD_HHMM_XXX_description.py`
|
||||
- **Revision IDs**: Sequential numbers (`001`, `002`, `003`)
|
||||
- **Descriptions**: Imperative mood ("add table", "remove column")
|
||||
|
||||
### Migration Guidelines
|
||||
|
||||
1. **Test Thoroughly**: Test both upgrade and downgrade paths
|
||||
2. **Preserve Data**: Ensure data migration logic is correct
|
||||
3. **Document Changes**: Add comments explaining complex operations
|
||||
4. **Small Changes**: One logical change per migration
|
||||
5. **No Breaking Changes**: Maintain backward compatibility when possible
|
||||
|
||||
### Downgrade Considerations
|
||||
|
||||
- **Data Loss**: Downgrade may lose data (dropped columns, tables)
|
||||
- **Confirmation**: Downgrade command requires explicit confirmation
|
||||
- **Testing**: Always test downgrade path before deploying
|
||||
- **Emergency Only**: Use downgrades only for critical rollbacks
|
||||
|
||||
## Backward Compatibility
|
||||
|
||||
### Pre-Alembic Databases
|
||||
|
||||
Existing databases created before Alembic integration are automatically detected and stamped with revision `001`:
|
||||
|
||||
1. Server detects no `alembic_version` table
|
||||
2. Checks if `refresh_tokens` table exists
|
||||
3. If yes, stamps database with `001` (no schema changes)
|
||||
4. Future updates use normal migration path
|
||||
|
||||
### Migration Path
|
||||
|
||||
```
|
||||
Pre-Alembic DB → Stamp(001) → Upgrade(002) → Upgrade(003) → ...
|
||||
New DB → Migrate(001) → Upgrade(002) → Upgrade(003) → ...
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Migration Fails
|
||||
|
||||
```bash
|
||||
# Check current state
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
|
||||
# View migration history
|
||||
uv run nextcloud-mcp-server db history -d /path/to/tokens.db
|
||||
|
||||
# Manually inspect database
|
||||
sqlite3 /path/to/tokens.db ".schema"
|
||||
```
|
||||
|
||||
### Reset to Initial State
|
||||
|
||||
**WARNING: This destroys all data!**
|
||||
|
||||
```bash
|
||||
# Downgrade to base (empty database)
|
||||
uv run nextcloud-mcp-server db downgrade -d /path/to/tokens.db --revision base
|
||||
|
||||
# Upgrade to latest
|
||||
uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
### Corrupted Migration State
|
||||
|
||||
If `alembic_version` table is corrupted:
|
||||
|
||||
```bash
|
||||
# Manually fix via SQL
|
||||
sqlite3 /path/to/tokens.db
|
||||
> DELETE FROM alembic_version;
|
||||
> INSERT INTO alembic_version (version_num) VALUES ('001');
|
||||
> .quit
|
||||
|
||||
# Verify and upgrade
|
||||
uv run nextcloud-mcp-server db current -d /path/to/tokens.db
|
||||
uv run nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
```
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### Pre-Deployment
|
||||
|
||||
```bash
|
||||
# Run migrations in test environment
|
||||
export TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Verify current version
|
||||
uv run nextcloud-mcp-server db current
|
||||
```
|
||||
|
||||
### Docker Deployment
|
||||
|
||||
Migrations run automatically on container startup via `RefreshTokenStorage.initialize()`.
|
||||
|
||||
### Rollback Plan
|
||||
|
||||
1. Stop application
|
||||
2. Backup database: `cp tokens.db tokens.db.backup`
|
||||
3. Downgrade: `uv run nextcloud-mcp-server db downgrade --revision XXX`
|
||||
4. Deploy previous application version
|
||||
5. Restart application
|
||||
|
||||
## References
|
||||
|
||||
- [Alembic Documentation](https://alembic.sqlalchemy.org/)
|
||||
- [SQLite ALTER TABLE Limitations](https://www.sqlite.org/lang_altertable.html)
|
||||
- [ADR-004: Progressive Consent](./ADR-004-progressive-consent.md) (migration 001)
|
||||
+196
-186
@@ -14,10 +14,100 @@ Before running the server:
|
||||
|
||||
## Quick Start
|
||||
|
||||
Start the server using Docker:
|
||||
Load your environment variables and start the server:
|
||||
|
||||
```bash
|
||||
# OAuth mode (recommended)
|
||||
# Load environment variables from .env
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Start the server
|
||||
uv run nextcloud-mcp-server
|
||||
```
|
||||
|
||||
The server will start on `http://127.0.0.1:8000` by default.
|
||||
|
||||
---
|
||||
|
||||
## Running Locally
|
||||
|
||||
### Method 1: Using nextcloud-mcp-server CLI (Recommended)
|
||||
|
||||
The CLI provides a simple interface with built-in defaults:
|
||||
|
||||
#### OAuth Mode
|
||||
|
||||
```bash
|
||||
# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD not set
|
||||
uv run nextcloud-mcp-server
|
||||
|
||||
# Explicitly force OAuth mode
|
||||
uv run nextcloud-mcp-server --oauth
|
||||
|
||||
# OAuth with custom host and port
|
||||
uv run nextcloud-mcp-server --oauth --host 0.0.0.0 --port 8080
|
||||
|
||||
# OAuth with pre-configured client
|
||||
uv run nextcloud-mcp-server --oauth \
|
||||
--oauth-client-id abc123 \
|
||||
--oauth-client-secret xyz789
|
||||
|
||||
# OAuth with specific apps only
|
||||
uv run nextcloud-mcp-server --oauth \
|
||||
--enable-app notes \
|
||||
--enable-app calendar
|
||||
```
|
||||
|
||||
#### BasicAuth Mode (Legacy)
|
||||
|
||||
```bash
|
||||
# Auto-detected when NEXTCLOUD_USERNAME/PASSWORD are set
|
||||
uv run nextcloud-mcp-server
|
||||
|
||||
# Explicitly force BasicAuth mode
|
||||
uv run nextcloud-mcp-server --no-oauth
|
||||
|
||||
# BasicAuth with specific apps
|
||||
uv run nextcloud-mcp-server --no-oauth \
|
||||
--enable-app notes \
|
||||
--enable-app webdav
|
||||
```
|
||||
|
||||
### Method 2: Using uvicorn
|
||||
|
||||
For more control over server options (workers, reload, etc.):
|
||||
|
||||
```bash
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run with uvicorn
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
--reload # Enable auto-reload for development
|
||||
```
|
||||
|
||||
See all uvicorn options at [https://www.uvicorn.org/settings/](https://www.uvicorn.org/settings/)
|
||||
|
||||
### Method 3: Using Python Module
|
||||
|
||||
```bash
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run as Python module
|
||||
python -m nextcloud_mcp_server.app --oauth --port 8000
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Running with Docker
|
||||
|
||||
### Basic Docker Run
|
||||
|
||||
```bash
|
||||
# OAuth mode
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
@@ -26,56 +116,11 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
```
|
||||
|
||||
The server will start on `http://127.0.0.1:8000` by default.
|
||||
|
||||
---
|
||||
|
||||
## Running with Docker
|
||||
|
||||
### Basic Docker Run
|
||||
|
||||
#### OAuth Mode (Recommended)
|
||||
### Docker with Persistent OAuth Storage
|
||||
|
||||
```bash
|
||||
# OAuth with auto-registration
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with custom port
|
||||
docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with pre-configured client
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
-e NEXTCLOUD_OIDC_CLIENT_ID=abc123 \
|
||||
-e NEXTCLOUD_OIDC_CLIENT_SECRET=xyz789 \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
|
||||
# OAuth with specific apps only
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes --enable-app calendar
|
||||
```
|
||||
|
||||
#### BasicAuth Mode (Legacy)
|
||||
|
||||
```bash
|
||||
# BasicAuth (requires NEXTCLOUD_USERNAME/PASSWORD in .env)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
|
||||
# BasicAuth with specific apps
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest \
|
||||
--enable-app notes --enable-app webdav
|
||||
```
|
||||
|
||||
### Docker with Persistent Token Storage
|
||||
|
||||
```bash
|
||||
# Mount volume for persistent OAuth token storage
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env \
|
||||
-v $(pwd)/data:/app/data \
|
||||
-v $(pwd)/.oauth:/app/.oauth \
|
||||
--rm ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
```
|
||||
|
||||
@@ -95,7 +140,7 @@ services:
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./data:/app/data # Persistent token storage
|
||||
- ./oauth-storage:/app/.oauth
|
||||
restart: unless-stopped
|
||||
```
|
||||
|
||||
@@ -123,39 +168,30 @@ docker-compose down
|
||||
|
||||
```bash
|
||||
# Bind to all interfaces (accessible from network)
|
||||
docker run -p 0.0.0.0:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
uv run nextcloud-mcp-server --host 0.0.0.0 --port 8000
|
||||
|
||||
# Bind to localhost only (default, more secure)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
uv run nextcloud-mcp-server --host 127.0.0.1 --port 8000
|
||||
|
||||
# Use a different port (map host port 8080 to container port 8000)
|
||||
docker run -p 127.0.0.1:8080:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
# Use a different port
|
||||
uv run nextcloud-mcp-server --port 8080
|
||||
```
|
||||
|
||||
**Security Note:** Binding to `0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.
|
||||
**Security Note:** Using `--host 0.0.0.0` exposes the server to your network. Only use this if you understand the security implications.
|
||||
|
||||
### Transport Protocols
|
||||
|
||||
The server supports multiple MCP transport protocols:
|
||||
|
||||
```bash
|
||||
# Streamable HTTP (default, recommended)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport streamable-http
|
||||
# Streamable HTTP (recommended)
|
||||
uv run nextcloud-mcp-server --transport streamable-http
|
||||
|
||||
# SSE - Server-Sent Events (deprecated)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport sse
|
||||
# SSE - Server-Sent Events (default, deprecated)
|
||||
uv run nextcloud-mcp-server --transport sse
|
||||
|
||||
# HTTP
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--transport http
|
||||
uv run nextcloud-mcp-server --transport http
|
||||
```
|
||||
|
||||
> [!WARNING]
|
||||
@@ -165,14 +201,10 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
|
||||
```bash
|
||||
# Set log level (critical, error, warning, info, debug, trace)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level debug
|
||||
uv run nextcloud-mcp-server --log-level debug
|
||||
|
||||
# Production: use warning or error
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level warning
|
||||
uv run nextcloud-mcp-server --log-level warning
|
||||
```
|
||||
|
||||
### Selective App Enablement
|
||||
@@ -180,26 +212,22 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
By default, all supported Nextcloud apps are enabled. You can enable specific apps only:
|
||||
|
||||
```bash
|
||||
# Available apps: notes, tables, webdav, calendar, contacts, cookbook, deck
|
||||
# Available apps: notes, tables, webdav, calendar, contacts, deck
|
||||
|
||||
# Enable all apps (default)
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth
|
||||
uv run nextcloud-mcp-server
|
||||
|
||||
# Enable only Notes
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes
|
||||
uv run nextcloud-mcp-server --enable-app notes
|
||||
|
||||
# Enable multiple apps
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app notes --enable-app calendar --enable-app contacts
|
||||
uv run nextcloud-mcp-server \
|
||||
--enable-app notes \
|
||||
--enable-app calendar \
|
||||
--enable-app contacts
|
||||
|
||||
# Enable only WebDAV for file operations
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--enable-app webdav
|
||||
uv run nextcloud-mcp-server --enable-app webdav
|
||||
```
|
||||
|
||||
**Use cases:**
|
||||
@@ -212,68 +240,24 @@ docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
|
||||
## Development Mode
|
||||
|
||||
### Running for Development
|
||||
|
||||
For active development with auto-reload, mount your source code as a volume:
|
||||
For active development with auto-reload:
|
||||
|
||||
```bash
|
||||
# Development mode with source code mounted
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
-v $(pwd):/app \
|
||||
-v $(pwd)/data:/app/data \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
# Using uvicorn with reload
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--reload \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
--log-level debug
|
||||
```
|
||||
|
||||
For local development without Docker:
|
||||
Or use the CLI with reload flag:
|
||||
|
||||
```bash
|
||||
# Load environment variables
|
||||
export $(grep -v '^#' .env | xargs)
|
||||
|
||||
# Run the server with auto-reload
|
||||
uv run nextcloud-mcp-server run --oauth --log-level debug
|
||||
uv run nextcloud-mcp-server --reload --log-level debug
|
||||
```
|
||||
|
||||
### CLI Subcommands
|
||||
|
||||
The `nextcloud-mcp-server` CLI has two main subcommands:
|
||||
|
||||
1. **`run`** - Start the MCP server (default command in Docker)
|
||||
```bash
|
||||
uv run nextcloud-mcp-server run --oauth --host 0.0.0.0 --port 8000
|
||||
```
|
||||
|
||||
2. **`db`** - Database migration management (Alembic)
|
||||
```bash
|
||||
# Show current migration revision
|
||||
uv run nextcloud-mcp-server db current
|
||||
|
||||
# Upgrade to latest migration
|
||||
uv run nextcloud-mcp-server db upgrade
|
||||
|
||||
# Show migration history
|
||||
uv run nextcloud-mcp-server db history
|
||||
|
||||
# Create new migration (developers only)
|
||||
uv run nextcloud-mcp-server db migrate "description of changes"
|
||||
```
|
||||
|
||||
### Database Migrations
|
||||
|
||||
Token storage uses **Alembic** for schema management:
|
||||
|
||||
- **Automatic migrations**: Database is upgraded automatically on server startup
|
||||
- **Backward compatibility**: Pre-Alembic databases are automatically stamped with the initial revision
|
||||
- **Migration files**: Located in `alembic/versions/`
|
||||
- **For developers**: When changing the schema:
|
||||
1. Create a migration: `uv run nextcloud-mcp-server db migrate "add new column"`
|
||||
2. Edit the generated file in `alembic/versions/` to add SQL statements
|
||||
3. Test upgrade: `uv run nextcloud-mcp-server db upgrade`
|
||||
4. Test downgrade: `uv run nextcloud-mcp-server db downgrade`
|
||||
|
||||
See [Database Migrations Guide](database-migrations.md) for detailed information.
|
||||
|
||||
---
|
||||
|
||||
## Connecting to the Server
|
||||
@@ -282,15 +266,15 @@ See [Database Migrations Guide](database-migrations.md) for detailed information
|
||||
|
||||
MCP Inspector is a browser-based tool for testing MCP servers:
|
||||
|
||||
1. Start your MCP server using Docker (see above)
|
||||
2. Start MCP Inspector:
|
||||
```bash
|
||||
npx @modelcontextprotocol/inspector
|
||||
```
|
||||
3. In the browser:
|
||||
- Enter server URL: `http://localhost:8000`
|
||||
- Complete OAuth flow (if using OAuth)
|
||||
- Explore tools and resources
|
||||
```bash
|
||||
# Start MCP Inspector
|
||||
uv run mcp dev
|
||||
|
||||
# In the browser:
|
||||
# 1. Enter server URL: http://localhost:8000
|
||||
# 2. Complete OAuth flow (if using OAuth)
|
||||
# 3. Explore tools and resources
|
||||
```
|
||||
|
||||
### Using MCP Clients
|
||||
|
||||
@@ -338,13 +322,48 @@ INFO Initializing Nextcloud client with BasicAuth
|
||||
|
||||
### Running as a Background Service
|
||||
|
||||
Use Docker Compose with `restart: unless-stopped` (see [Docker Compose section](#docker-compose) above).
|
||||
#### Using systemd (Linux)
|
||||
|
||||
Create `/etc/systemd/system/nextcloud-mcp.service`:
|
||||
|
||||
```ini
|
||||
[Unit]
|
||||
Description=Nextcloud MCP Server
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=your-user
|
||||
WorkingDirectory=/path/to/nextcloud-mcp-server
|
||||
EnvironmentFile=/path/to/.env
|
||||
ExecStart=/path/to/uv run nextcloud-mcp-server --oauth
|
||||
Restart=on-failure
|
||||
RestartSec=10
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Enable and start:
|
||||
|
||||
```bash
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nextcloud-mcp
|
||||
sudo systemctl start nextcloud-mcp
|
||||
sudo systemctl status nextcloud-mcp
|
||||
```
|
||||
|
||||
#### Using Docker Compose
|
||||
|
||||
See [Docker Compose section](#docker-compose) above - includes `restart: unless-stopped`.
|
||||
|
||||
### Monitoring Logs
|
||||
|
||||
```bash
|
||||
# Docker (find container name first)
|
||||
docker ps
|
||||
# Local installation with systemd
|
||||
sudo journalctl -u nextcloud-mcp -f
|
||||
|
||||
# Docker
|
||||
docker logs -f <container-name>
|
||||
|
||||
# Docker Compose
|
||||
@@ -355,37 +374,34 @@ docker-compose logs -f mcp
|
||||
|
||||
## Performance Tuning
|
||||
|
||||
### Production Settings
|
||||
### Multiple Workers
|
||||
|
||||
For production deployments, use Docker Compose with the recommended settings:
|
||||
For production deployments with higher load:
|
||||
|
||||
```yaml
|
||||
version: '3.8'
|
||||
```bash
|
||||
# Using CLI (if supported)
|
||||
uv run nextcloud-mcp-server --workers 4
|
||||
|
||||
services:
|
||||
mcp:
|
||||
image: ghcr.io/cbcoutinho/nextcloud-mcp-server:latest
|
||||
command: --oauth --log-level warning --transport streamable-http
|
||||
ports:
|
||||
- "127.0.0.1:8000:8000"
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./data:/app/data
|
||||
restart: unless-stopped
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '2'
|
||||
memory: 1G
|
||||
reservations:
|
||||
cpus: '0.5'
|
||||
memory: 512M
|
||||
# Using uvicorn
|
||||
uv run uvicorn nextcloud_mcp_server.app:get_app \
|
||||
--factory \
|
||||
--workers 4 \
|
||||
--host 0.0.0.0 \
|
||||
--port 8000
|
||||
```
|
||||
|
||||
### Scaling with Multiple Replicas
|
||||
### Production Settings
|
||||
|
||||
For higher load, use Docker Swarm or Kubernetes. See the [Helm Chart](../helm/) for Kubernetes deployments.
|
||||
```bash
|
||||
# Recommended production configuration
|
||||
uv run nextcloud-mcp-server \
|
||||
--oauth \
|
||||
--host 127.0.0.1 \
|
||||
--port 8000 \
|
||||
--log-level warning \
|
||||
--transport streamable-http \
|
||||
--workers 2
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -395,18 +411,12 @@ For higher load, use Docker Swarm or Kubernetes. See the [Helm Chart](../helm/)
|
||||
|
||||
Check logs for errors:
|
||||
```bash
|
||||
# View container logs
|
||||
docker logs <container-name>
|
||||
|
||||
# Or run with debug logging
|
||||
docker run -p 127.0.0.1:8000:8000 --env-file .env --rm \
|
||||
ghcr.io/cbcoutinho/nextcloud-mcp-server:latest --oauth \
|
||||
--log-level debug
|
||||
uv run nextcloud-mcp-server --log-level debug
|
||||
```
|
||||
|
||||
Common issues:
|
||||
- Environment variables not loaded - Check your `.env` file
|
||||
- Port already in use - Use a different host port (e.g., `-p 127.0.0.1:8080:8000`)
|
||||
- Environment variables not loaded - See [Configuration](configuration.md#loading-environment-variables)
|
||||
- Port already in use - Try a different port with `--port`
|
||||
- OAuth configuration errors - See [Troubleshooting](troubleshooting.md)
|
||||
|
||||
### Can't connect to server
|
||||
|
||||
@@ -5,7 +5,7 @@ This document explains the architecture of the semantic search feature in the Ne
|
||||
> [!IMPORTANT]
|
||||
> **Status: Experimental**
|
||||
> - Disabled by default (`VECTOR_SYNC_ENABLED=false`)
|
||||
> - Currently supports **Notes, Files (PDFs), News items, and Deck cards**
|
||||
> - Currently supports **Notes app only** (multi-app architecture ready, additional apps planned)
|
||||
> - Requires additional infrastructure (Qdrant vector database + Ollama embedding service)
|
||||
> - RAG answer generation requires MCP client sampling support
|
||||
|
||||
@@ -39,9 +39,9 @@ Semantic search enables:
|
||||
|
||||
### Current Support
|
||||
|
||||
- **Supported Apps**: Notes, Files (PDFs with text extraction), News items, Deck cards
|
||||
- **Planned Apps**: Calendar events, Calendar tasks, Contacts
|
||||
- **Architecture**: Multi-app plugin system ready for additional apps
|
||||
- **Supported Apps**: Notes (fully implemented)
|
||||
- **Planned Apps**: Calendar events, Calendar tasks, Deck cards, Files (with text extraction), Contacts
|
||||
- **Architecture**: Multi-app plugin system ready, awaiting implementation
|
||||
|
||||
## System Components
|
||||
|
||||
|
||||
@@ -4,146 +4,6 @@ This guide covers common issues and solutions for the Nextcloud MCP server.
|
||||
|
||||
> **OAuth-specific issues?** See the dedicated [OAuth Troubleshooting Guide](oauth-troubleshooting.md) for OAuth authentication problems, OIDC discovery issues, token validation failures, and more.
|
||||
|
||||
> **Upgrading from v0.57.x?** See the [Configuration Migration Guide](configuration-migration-v2.md) for help with new variable names.
|
||||
|
||||
## Configuration Issues (v0.58.0+)
|
||||
|
||||
### Issue: Deprecation warning for VECTOR_SYNC_ENABLED
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
WARNING: VECTOR_SYNC_ENABLED is deprecated. Please use ENABLE_SEMANTIC_SEARCH instead.
|
||||
```
|
||||
|
||||
**Cause:** You're using the old variable name from v0.57.x.
|
||||
|
||||
**Solution:**
|
||||
```bash
|
||||
# In your .env file, replace:
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
# With:
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
```
|
||||
|
||||
See [Configuration Migration Guide](configuration-migration-v2.md) for complete migration instructions.
|
||||
|
||||
---
|
||||
|
||||
### Issue: Deprecation warning for ENABLE_OFFLINE_ACCESS
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
WARNING: ENABLE_OFFLINE_ACCESS is deprecated. Please use ENABLE_BACKGROUND_OPERATIONS instead.
|
||||
```
|
||||
|
||||
**Cause:** You're using the old variable name from v0.57.x.
|
||||
|
||||
**Solution:**
|
||||
|
||||
**If you have semantic search enabled:**
|
||||
```bash
|
||||
# In multi-user modes, you can remove ENABLE_OFFLINE_ACCESS entirely!
|
||||
# ENABLE_SEMANTIC_SEARCH automatically enables background operations
|
||||
|
||||
# Before (v0.57.x):
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
# After (v0.58.0+):
|
||||
ENABLE_SEMANTIC_SEARCH=true # This is all you need!
|
||||
```
|
||||
|
||||
**If you only want background operations (no semantic search):**
|
||||
```bash
|
||||
# Replace:
|
||||
ENABLE_OFFLINE_ACCESS=true
|
||||
|
||||
# With:
|
||||
ENABLE_BACKGROUND_OPERATIONS=true
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Issue: "Invalid MCP_DEPLOYMENT_MODE"
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
ValueError: Invalid MCP_DEPLOYMENT_MODE: 'oauth'. Valid values: single_user_basic, multi_user_basic, oauth_single_audience, oauth_token_exchange, smithery
|
||||
```
|
||||
|
||||
**Cause:** Invalid value for `MCP_DEPLOYMENT_MODE`.
|
||||
|
||||
**Solution:**
|
||||
Use one of the valid mode values:
|
||||
```bash
|
||||
# Correct values:
|
||||
MCP_DEPLOYMENT_MODE=single_user_basic # Single-user with username/password
|
||||
MCP_DEPLOYMENT_MODE=multi_user_basic # Multi-user BasicAuth
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience # OAuth (recommended)
|
||||
MCP_DEPLOYMENT_MODE=oauth_token_exchange # OAuth with token exchange
|
||||
MCP_DEPLOYMENT_MODE=smithery # Smithery deployment
|
||||
```
|
||||
|
||||
Or remove `MCP_DEPLOYMENT_MODE` to use automatic detection.
|
||||
|
||||
---
|
||||
|
||||
### Issue: Missing TOKEN_ENCRYPTION_KEY when semantic search enabled
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: [oauth_single_audience] TOKEN_ENCRYPTION_KEY is required when ENABLE_SEMANTIC_SEARCH is enabled
|
||||
```
|
||||
|
||||
**Cause:** In multi-user modes, semantic search automatically enables background operations, which require encrypted token storage.
|
||||
|
||||
**Solution:**
|
||||
Generate an encryption key and add required token storage configuration:
|
||||
|
||||
```bash
|
||||
# Generate encryption key
|
||||
python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
|
||||
# Add to .env:
|
||||
TOKEN_ENCRYPTION_KEY=<generated-key>
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=your-client-id # Required for app password retrieval
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
|
||||
```
|
||||
|
||||
**Why this happens:**
|
||||
- v0.58.0+ automatically enables background operations when `ENABLE_SEMANTIC_SEARCH=true` in multi-user modes
|
||||
- Background operations need encrypted refresh token storage
|
||||
- This simplifies configuration but requires the encryption infrastructure
|
||||
|
||||
See [Configuration Guide - Semantic Search](configuration.md#semantic-search-configuration-optional) for details.
|
||||
|
||||
---
|
||||
|
||||
### Issue: Both old and new variable names set
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
WARNING: Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. Using ENABLE_SEMANTIC_SEARCH.
|
||||
```
|
||||
|
||||
**Cause:** You have both the old and new variable names in your configuration.
|
||||
|
||||
**Solution:**
|
||||
Remove the old variable name:
|
||||
```bash
|
||||
# Remove this line:
|
||||
VECTOR_SYNC_ENABLED=true
|
||||
|
||||
# Keep this line:
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
```
|
||||
|
||||
The server will use the new name and ignore the old one, but it's cleaner to remove the old variable entirely.
|
||||
|
||||
---
|
||||
|
||||
## OAuth Issues (Quick Reference)
|
||||
|
||||
### Issue: "OAuth mode requires NEXTCLOUD_HOST environment variable"
|
||||
|
||||
+175
-213
@@ -1,236 +1,198 @@
|
||||
# ============================================
|
||||
# DEPLOYMENT MODE SELECTION
|
||||
# ============================================
|
||||
# Optional: Explicitly declare deployment mode (ADR-021)
|
||||
# If not set, mode is auto-detected from other settings
|
||||
# Valid values: single_user_basic, multi_user_basic, oauth_single_audience,
|
||||
# oauth_token_exchange, smithery
|
||||
#
|
||||
# Recommendation: Set this for clarity and to catch configuration errors early
|
||||
#MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# ============================================
|
||||
# COMMON SETTINGS (Required for all modes)
|
||||
# ============================================
|
||||
# Your Nextcloud instance URL (without trailing slash)
|
||||
# Nextcloud Instance
|
||||
NEXTCLOUD_HOST=
|
||||
|
||||
# ============================================
|
||||
# SINGLE-USER BASICAUTH MODE
|
||||
# ============================================
|
||||
# Simplest deployment - one user, credentials in environment
|
||||
# Use for: Personal instances, local development, testing
|
||||
#
|
||||
# Required:
|
||||
# ===== AUTHENTICATION MODE =====
|
||||
# Choose ONE of the following:
|
||||
|
||||
# Option 1: OAuth2/OIDC (RECOMMENDED - More Secure)
|
||||
# - Requires Nextcloud OIDC app installed and configured
|
||||
# - Admin must enable "Dynamic Client Registration" in OIDC app settings
|
||||
# - Leave NEXTCLOUD_USERNAME and NEXTCLOUD_PASSWORD empty to use OAuth mode
|
||||
# - OAuth client credentials are stored encrypted in SQLite (TOKEN_STORAGE_DB)
|
||||
# - Optional: Pre-register client and provide credentials (otherwise auto-registers)
|
||||
NEXTCLOUD_OIDC_CLIENT_ID=
|
||||
NEXTCLOUD_OIDC_CLIENT_SECRET=
|
||||
NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
|
||||
# OAuth Storage Configuration (SQLite storage for OAuth clients and refresh tokens)
|
||||
# TOKEN_ENCRYPTION_KEY: Required for encrypting OAuth client secrets and refresh tokens
|
||||
# Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
#TOKEN_ENCRYPTION_KEY=
|
||||
# TOKEN_STORAGE_DB: Path to SQLite database (default: /app/data/tokens.db)
|
||||
#TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# ===== ADR-004 PROGRESSIVE CONSENT CONFIGURATION =====
|
||||
# Enable Progressive Consent mode (dual OAuth flows)
|
||||
# When enabled: Flow 1 for client auth, Flow 2 for Nextcloud resource access
|
||||
# When disabled: Uses existing hybrid flow (backward compatible)
|
||||
|
||||
# MCP Server OAuth Client Configuration
|
||||
# The MCP server's own OAuth client credentials for Flow 2
|
||||
# If not set, will use dynamic client registration
|
||||
#MCP_SERVER_CLIENT_ID=
|
||||
#MCP_SERVER_CLIENT_SECRET=
|
||||
|
||||
# Allowed MCP Client IDs (comma-separated list)
|
||||
# Client IDs that are allowed to authenticate in Flow 1
|
||||
# Examples: claude-desktop,continue-dev,zed-editor
|
||||
#ALLOWED_MCP_CLIENTS=claude-desktop,continue-dev,zed-editor
|
||||
|
||||
# Token cache configuration for Token Broker Service
|
||||
# Cache TTL in seconds (default: 300 = 5 minutes)
|
||||
#TOKEN_CACHE_TTL=300
|
||||
# Early refresh threshold in seconds (default: 30)
|
||||
#TOKEN_CACHE_EARLY_REFRESH=30
|
||||
|
||||
# Option 2: Basic Authentication (LEGACY - Less Secure)
|
||||
# - Requires username and password
|
||||
# - Credentials stored in environment variables
|
||||
# - Use only for backward compatibility or if OAuth unavailable
|
||||
# - If these are set, OAuth mode is disabled
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
#
|
||||
# Optional features (semantic search, document processing):
|
||||
# See "Optional Features" section below
|
||||
|
||||
# ============================================
|
||||
# MULTI-USER BASICAUTH MODE
|
||||
# Document Processing Configuration
|
||||
# ============================================
|
||||
# Users provide credentials in request headers (pass-through)
|
||||
# Use for: Multi-user without OAuth, simple shared deployments
|
||||
#
|
||||
# Required:
|
||||
#ENABLE_MULTI_USER_BASIC_AUTH=true
|
||||
#
|
||||
# Optional - Background Operations (for semantic search, future features):
|
||||
# Enable background token storage using app passwords (via Astrolabe)
|
||||
# Required for semantic search in multi-user mode
|
||||
# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
|
||||
#ENABLE_BACKGROUND_OPERATIONS=true
|
||||
#NEXTCLOUD_OIDC_CLIENT_ID=
|
||||
#NEXTCLOUD_OIDC_CLIENT_SECRET=
|
||||
#TOKEN_ENCRYPTION_KEY=
|
||||
#TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
#
|
||||
# Optional features (semantic search, document processing):
|
||||
# See "Optional Features" section below
|
||||
# Enable document processing (PDF, DOCX, images, etc.)
|
||||
# Set to false to disable all document processing
|
||||
ENABLE_DOCUMENT_PROCESSING=false
|
||||
|
||||
# Default processor to use when multiple are available
|
||||
# Options: unstructured, tesseract, custom
|
||||
DOCUMENT_PROCESSOR=unstructured
|
||||
|
||||
# ============================================
|
||||
# OAUTH SINGLE-AUDIENCE MODE (Recommended)
|
||||
# Unstructured.io Processor
|
||||
# ============================================
|
||||
# Multi-user OAuth with single-audience tokens
|
||||
# Use for: Multi-user production deployments, enhanced security
|
||||
# Tokens work for both MCP server and Nextcloud APIs (pass-through)
|
||||
#
|
||||
# Required: None (uses Dynamic Client Registration if credentials not provided)
|
||||
#
|
||||
# Optional - Pre-registered OAuth Client:
|
||||
# If you pre-register the client instead of using DCR:
|
||||
#NEXTCLOUD_OIDC_CLIENT_ID=
|
||||
#NEXTCLOUD_OIDC_CLIENT_SECRET=
|
||||
#
|
||||
# Optional - Background Operations (for semantic search, future features):
|
||||
# Enable refresh token storage for offline access
|
||||
# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
|
||||
#ENABLE_BACKGROUND_OPERATIONS=true
|
||||
#TOKEN_ENCRYPTION_KEY=
|
||||
#TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
#
|
||||
# Optional - Custom OIDC Discovery:
|
||||
# Auto-detected from NEXTCLOUD_HOST if not set
|
||||
#NEXTCLOUD_OIDC_DISCOVERY_URL=
|
||||
#
|
||||
# Optional - Custom Scopes:
|
||||
# Default: openid profile email offline_access notes:* calendar:* contacts:* tables:* webdav:* deck:* cookbook:*
|
||||
#NEXTCLOUD_OIDC_SCOPES=openid profile email notes:* calendar:*
|
||||
#
|
||||
# MCP Server URL (for OAuth redirects):
|
||||
#NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
#
|
||||
# Optional features (semantic search, document processing):
|
||||
# See "Optional Features" section below
|
||||
# Enable Unstructured processor (requires unstructured service in docker-compose)
|
||||
# This is a cloud-based/API processor supporting many document types
|
||||
ENABLE_UNSTRUCTURED=false
|
||||
|
||||
# Unstructured API endpoint
|
||||
UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
|
||||
# Request timeout in seconds (default: 120)
|
||||
# OCR operations can take 30-120 seconds for large documents
|
||||
UNSTRUCTURED_TIMEOUT=120
|
||||
|
||||
# Parsing strategy: auto, fast, hi_res
|
||||
# - auto: Automatically choose based on document type
|
||||
# - fast: Fast parsing without OCR
|
||||
# - hi_res: High-resolution with OCR (slowest, most accurate)
|
||||
UNSTRUCTURED_STRATEGY=auto
|
||||
|
||||
# OCR languages (comma-separated ISO 639-3 codes)
|
||||
# Common: eng=English, deu=German, fra=French, spa=Spanish
|
||||
UNSTRUCTURED_LANGUAGES=eng,deu
|
||||
|
||||
# Progress reporting interval in seconds (default: 10)
|
||||
# During long-running OCR operations, progress notifications are sent to the MCP client
|
||||
# at this interval to prevent timeouts and provide status updates
|
||||
PROGRESS_INTERVAL=10
|
||||
|
||||
# ============================================
|
||||
# OAUTH TOKEN EXCHANGE MODE (Advanced)
|
||||
# Tesseract Processor (Local OCR)
|
||||
# ============================================
|
||||
# Multi-user OAuth with RFC 8693 token exchange
|
||||
# Use for: Advanced deployments requiring separate MCP and Nextcloud tokens
|
||||
# MCP tokens are separate from Nextcloud tokens
|
||||
#
|
||||
# Required:
|
||||
#ENABLE_TOKEN_EXCHANGE=true
|
||||
#
|
||||
# Optional - Pre-registered OAuth Client:
|
||||
# If you pre-register the client instead of using DCR:
|
||||
#NEXTCLOUD_OIDC_CLIENT_ID=
|
||||
#NEXTCLOUD_OIDC_CLIENT_SECRET=
|
||||
#
|
||||
# Optional - Token Exchange Configuration:
|
||||
# Cache TTL in seconds (default: 300 = 5 minutes)
|
||||
#TOKEN_EXCHANGE_CACHE_TTL=300
|
||||
#
|
||||
# Optional - Background Operations:
|
||||
# Note: ENABLE_SEMANTIC_SEARCH automatically enables this in multi-user modes
|
||||
#ENABLE_BACKGROUND_OPERATIONS=true
|
||||
#TOKEN_ENCRYPTION_KEY=
|
||||
#TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
#
|
||||
# Optional - Custom OIDC Discovery:
|
||||
#NEXTCLOUD_OIDC_DISCOVERY_URL=
|
||||
#
|
||||
# MCP Server URL (for OAuth redirects):
|
||||
#NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
#
|
||||
# Optional features (semantic search, document processing):
|
||||
# See "Optional Features" section below
|
||||
# Enable Tesseract processor (requires tesseract binary installed)
|
||||
# This is a local, lightweight OCR solution for images only
|
||||
ENABLE_TESSERACT=false
|
||||
|
||||
# ============================================
|
||||
# SMITHERY STATELESS MODE
|
||||
# ============================================
|
||||
# Stateless multi-tenant deployment for Smithery platform
|
||||
# Configuration comes from session URL parameters
|
||||
# No persistent storage, no OAuth, no vector sync
|
||||
#
|
||||
# Required: None (all config from session URL)
|
||||
# This mode is activated automatically when deployed to Smithery
|
||||
|
||||
# ============================================
|
||||
# OPTIONAL FEATURES (All Deployment Modes)
|
||||
# ============================================
|
||||
|
||||
# ===== SEMANTIC SEARCH =====
|
||||
# AI-powered semantic search across Nextcloud content
|
||||
# Requires: Qdrant vector database + embedding provider (Ollama, Bedrock, or Simple fallback)
|
||||
#
|
||||
# Enable semantic search:
|
||||
#ENABLE_SEMANTIC_SEARCH=true
|
||||
#
|
||||
# Note for Multi-User Modes:
|
||||
# ENABLE_SEMANTIC_SEARCH automatically enables background operations when needed
|
||||
# No need to set ENABLE_BACKGROUND_OPERATIONS separately
|
||||
# The server will automatically request refresh tokens and store them encrypted
|
||||
#
|
||||
# Vector Database - Choose ONE mode:
|
||||
# 1. In-memory (default): Set neither QDRANT_URL nor QDRANT_LOCATION
|
||||
# 2. Persistent local: Set QDRANT_LOCATION=/path/to/data
|
||||
# 3. Network: Set QDRANT_URL=http://qdrant:6333
|
||||
#
|
||||
#QDRANT_URL=http://qdrant:6333
|
||||
#QDRANT_LOCATION=:memory:
|
||||
#QDRANT_API_KEY=
|
||||
#QDRANT_COLLECTION=nextcloud_content
|
||||
#
|
||||
# Embedding Provider - Choose ONE:
|
||||
# 1. Ollama (recommended for local deployment):
|
||||
#OLLAMA_BASE_URL=http://ollama:11434
|
||||
#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
#OLLAMA_VERIFY_SSL=true
|
||||
#
|
||||
# 2. Amazon Bedrock (for AWS deployments):
|
||||
#AWS_REGION=us-east-1
|
||||
#BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
|
||||
# Optional: AWS credentials (uses credential chain if not set)
|
||||
#AWS_ACCESS_KEY_ID=
|
||||
#AWS_SECRET_ACCESS_KEY=
|
||||
#
|
||||
# 3. Simple (automatic fallback, no configuration needed)
|
||||
# Uses basic in-memory embeddings if no provider configured
|
||||
#
|
||||
# Document Chunking:
|
||||
# Configure how documents are split before embedding
|
||||
#DOCUMENT_CHUNK_SIZE=512
|
||||
#DOCUMENT_CHUNK_OVERLAP=50
|
||||
|
||||
# ===== SEMANTIC SEARCH TUNING =====
|
||||
# Advanced parameters for vector sync background operations
|
||||
# Only modify if you understand the implications
|
||||
#
|
||||
# Document scan interval in seconds (default: 300 = 5 minutes)
|
||||
#VECTOR_SYNC_SCAN_INTERVAL=300
|
||||
#
|
||||
# Concurrent indexing workers (default: 3)
|
||||
#VECTOR_SYNC_PROCESSOR_WORKERS=3
|
||||
#
|
||||
# Max queued documents (default: 10000)
|
||||
#VECTOR_SYNC_QUEUE_MAX_SIZE=10000
|
||||
|
||||
# ===== DOCUMENT PROCESSING =====
|
||||
# Extract text from PDFs, images, DOCX, etc. for semantic search
|
||||
# Disabled by default
|
||||
#
|
||||
#ENABLE_DOCUMENT_PROCESSING=false
|
||||
#DOCUMENT_PROCESSOR=unstructured
|
||||
#
|
||||
# Unstructured.io Processor (recommended):
|
||||
#ENABLE_UNSTRUCTURED=false
|
||||
#UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
#UNSTRUCTURED_TIMEOUT=120
|
||||
#UNSTRUCTURED_STRATEGY=auto
|
||||
#UNSTRUCTURED_LANGUAGES=eng,deu
|
||||
#PROGRESS_INTERVAL=10
|
||||
#
|
||||
# Tesseract OCR (lightweight, images only):
|
||||
#ENABLE_TESSERACT=false
|
||||
# Path to tesseract executable (optional, auto-detected if in PATH)
|
||||
#TESSERACT_CMD=/usr/bin/tesseract
|
||||
#TESSERACT_LANG=eng
|
||||
#
|
||||
# Custom Processor (your own API):
|
||||
#ENABLE_CUSTOM_PROCESSOR=false
|
||||
|
||||
# OCR language (e.g., eng, deu, eng+deu for multiple)
|
||||
TESSERACT_LANG=eng
|
||||
|
||||
# ============================================
|
||||
# Custom Processor (Your own API)
|
||||
# ============================================
|
||||
# Enable custom document processor via HTTP API
|
||||
ENABLE_CUSTOM_PROCESSOR=false
|
||||
|
||||
# Unique name for your processor
|
||||
#CUSTOM_PROCESSOR_NAME=my_ocr
|
||||
|
||||
# Your custom processor API endpoint
|
||||
#CUSTOM_PROCESSOR_URL=http://localhost:9000/process
|
||||
#CUSTOM_PROCESSOR_API_KEY=
|
||||
|
||||
# Optional API key for authentication
|
||||
#CUSTOM_PROCESSOR_API_KEY=your-api-key-here
|
||||
|
||||
# Request timeout in seconds
|
||||
#CUSTOM_PROCESSOR_TIMEOUT=60
|
||||
|
||||
# Comma-separated MIME types your processor supports
|
||||
#CUSTOM_PROCESSOR_TYPES=application/pdf,image/jpeg,image/png
|
||||
|
||||
# ===== SECURITY & ADVANCED =====
|
||||
# Cookie security (browser UI)
|
||||
# Auto-detects from NEXTCLOUD_HOST protocol if not set
|
||||
#COOKIE_SECURE=true
|
||||
# ============================================
|
||||
# Semantic Search & Vector Sync Configuration
|
||||
# ============================================
|
||||
# EXPERIMENTAL: Semantic search for Notes app (multi-app support planned)
|
||||
# Requires: Qdrant vector database + Ollama embedding service
|
||||
# Disabled by default
|
||||
|
||||
# Enable background vector indexing
|
||||
VECTOR_SYNC_ENABLED=false
|
||||
|
||||
# Document scan interval in seconds (default: 300 = 5 minutes)
|
||||
# How often to check for new/updated documents
|
||||
#VECTOR_SYNC_SCAN_INTERVAL=300
|
||||
|
||||
# Concurrent indexing workers (default: 3)
|
||||
# Number of parallel workers for embedding generation
|
||||
#VECTOR_SYNC_PROCESSOR_WORKERS=3
|
||||
|
||||
# Max queued documents (default: 10000)
|
||||
# Maximum documents waiting to be processed
|
||||
#VECTOR_SYNC_QUEUE_MAX_SIZE=10000
|
||||
|
||||
# ============================================
|
||||
# DEPRECATED VARIABLES (Backward Compatibility)
|
||||
# Qdrant Vector Database Configuration
|
||||
# ============================================
|
||||
# These variables still work but will be removed in v1.0.0
|
||||
# Please migrate to new names:
|
||||
#
|
||||
# Old Name → New Name
|
||||
# VECTOR_SYNC_ENABLED → ENABLE_SEMANTIC_SEARCH
|
||||
# ENABLE_OFFLINE_ACCESS → ENABLE_BACKGROUND_OPERATIONS
|
||||
#
|
||||
# Migration is optional - both old and new names work
|
||||
# Deprecation warnings will be logged when old names are used
|
||||
# Choose ONE of three modes:
|
||||
# 1. In-memory mode (default): Set neither QDRANT_URL nor QDRANT_LOCATION
|
||||
# 2. Persistent local: Set QDRANT_LOCATION=/path/to/data
|
||||
# 3. Network mode: Set QDRANT_URL=http://qdrant:6333
|
||||
|
||||
# Network mode: URL to Qdrant service
|
||||
#QDRANT_URL=http://qdrant:6333
|
||||
|
||||
# Local mode: Path to store vectors (use :memory: for in-memory)
|
||||
#QDRANT_LOCATION=:memory:
|
||||
|
||||
# API key for network mode (optional)
|
||||
#QDRANT_API_KEY=
|
||||
|
||||
# Collection name (optional - auto-generated if not set)
|
||||
# Auto-generation format: {deployment-id}-{model-name}
|
||||
# Allows safe model switching and multi-server deployments
|
||||
#QDRANT_COLLECTION=nextcloud_content
|
||||
|
||||
# ============================================
|
||||
# Ollama Embedding Service Configuration
|
||||
# ============================================
|
||||
# Ollama endpoint for embeddings (if not set, uses SimpleEmbeddingProvider fallback)
|
||||
#OLLAMA_BASE_URL=http://ollama:11434
|
||||
|
||||
# Embedding model to use (default: nomic-embed-text, 768 dimensions)
|
||||
# Changing this creates a new collection (requires re-embedding all documents)
|
||||
#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# Verify SSL certificates (default: true)
|
||||
#OLLAMA_VERIFY_SSL=true
|
||||
|
||||
# ============================================
|
||||
# Document Chunking Configuration
|
||||
# ============================================
|
||||
# Configure how documents are split before embedding
|
||||
|
||||
# Words per chunk (default: 512)
|
||||
# Smaller chunks (256-384): More precise, less context, more storage
|
||||
# Larger chunks (768-1024): More context, less precise, less storage
|
||||
#DOCUMENT_CHUNK_SIZE=512
|
||||
|
||||
# Overlapping words between chunks (default: 50)
|
||||
# Recommended: 10-20% of chunk size
|
||||
# Preserves context across chunk boundaries
|
||||
#DOCUMENT_CHUNK_OVERLAP=50
|
||||
|
||||
@@ -1,80 +0,0 @@
|
||||
# ============================================
|
||||
# OAUTH TOKEN EXCHANGE QUICK START (Advanced)
|
||||
# ============================================
|
||||
# Advanced OAuth deployment with RFC 8693 token exchange
|
||||
# Use for: Deployments requiring separate MCP and Nextcloud tokens
|
||||
# Features: Dual-audience tokens, enhanced security boundaries
|
||||
#
|
||||
# Copy this file to .env and configure
|
||||
|
||||
# ===== REQUIRED SETTINGS =====
|
||||
# Your Nextcloud instance URL (without trailing slash)
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
|
||||
# Enable token exchange mode
|
||||
ENABLE_TOKEN_EXCHANGE=true
|
||||
|
||||
# ===== REQUIRED: LEAVE USERNAME/PASSWORD EMPTY =====
|
||||
# OAuth mode activates when these are NOT set
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
|
||||
# Recommended for clarity
|
||||
MCP_DEPLOYMENT_MODE=oauth_token_exchange
|
||||
|
||||
# ===== OPTIONAL: PRE-REGISTERED OAUTH CLIENT =====
|
||||
# If you pre-register the OAuth client instead of using DCR:
|
||||
#NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
|
||||
#NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
|
||||
|
||||
# MCP Server URL (for OAuth redirects)
|
||||
NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
|
||||
# ===== OPTIONAL: TOKEN EXCHANGE TUNING =====
|
||||
# Cache TTL for exchanged tokens (default: 300 seconds = 5 minutes)
|
||||
TOKEN_EXCHANGE_CACHE_TTL=300
|
||||
|
||||
# ===== OPTIONAL: SEMANTIC SEARCH =====
|
||||
# AI-powered semantic search with automatic background operation setup
|
||||
#
|
||||
# Note: ENABLE_SEMANTIC_SEARCH automatically enables background operations
|
||||
# in token exchange mode, just like in OAuth single-audience mode
|
||||
#
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
|
||||
# Vector Database (required for semantic search)
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
|
||||
# Embedding Provider (required for semantic search)
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# Token Storage (required for background operations - auto-enabled by semantic search)
|
||||
# Generate encryption key: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
TOKEN_ENCRYPTION_KEY=your-encryption-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# ===== OPTIONAL: DOCUMENT PROCESSING =====
|
||||
# Extract text from PDFs, images, DOCX for semantic search
|
||||
#ENABLE_DOCUMENT_PROCESSING=true
|
||||
#ENABLE_UNSTRUCTURED=true
|
||||
#UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
|
||||
# ===== TOKEN EXCHANGE MODE EXPLANATION =====
|
||||
# In this mode:
|
||||
# 1. MCP clients authenticate with tokens scoped to "mcp-server" audience
|
||||
# 2. Server exchanges MCP tokens for Nextcloud tokens on each request
|
||||
# 3. Provides clear separation between MCP session and Nextcloud access
|
||||
# 4. Enables fine-grained token lifecycle management
|
||||
#
|
||||
# When to use:
|
||||
# - Strict security requirements (separate token contexts)
|
||||
# - Complex multi-service architectures
|
||||
# - Need independent token expiration policies
|
||||
#
|
||||
# When NOT to use:
|
||||
# - Simple deployments (use oauth_single_audience instead)
|
||||
# - High-performance requirements (token exchange adds latency)
|
||||
|
||||
# For more configuration options, see env.sample
|
||||
@@ -1,77 +0,0 @@
|
||||
# ============================================
|
||||
# OAUTH MULTI-USER QUICK START (Recommended)
|
||||
# ============================================
|
||||
# Multi-user deployment with OAuth authentication
|
||||
# Use for: Multi-user production deployments, enhanced security
|
||||
# Features: Single-audience tokens, automatic client registration (DCR)
|
||||
#
|
||||
# Copy this file to .env and configure
|
||||
|
||||
# ===== REQUIRED SETTINGS =====
|
||||
# Your Nextcloud instance URL (without trailing slash)
|
||||
NEXTCLOUD_HOST=https://nextcloud.example.com
|
||||
|
||||
# ===== REQUIRED: LEAVE USERNAME/PASSWORD EMPTY =====
|
||||
# OAuth mode activates when these are NOT set
|
||||
NEXTCLOUD_USERNAME=
|
||||
NEXTCLOUD_PASSWORD=
|
||||
|
||||
# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
|
||||
# Recommended for clarity
|
||||
MCP_DEPLOYMENT_MODE=oauth_single_audience
|
||||
|
||||
# ===== OPTIONAL: PRE-REGISTERED OAUTH CLIENT =====
|
||||
# If you pre-register the OAuth client instead of using DCR:
|
||||
#NEXTCLOUD_OIDC_CLIENT_ID=your-client-id
|
||||
#NEXTCLOUD_OIDC_CLIENT_SECRET=your-client-secret
|
||||
|
||||
# MCP Server URL (for OAuth redirects)
|
||||
NEXTCLOUD_MCP_SERVER_URL=http://localhost:8000
|
||||
|
||||
# ===== OPTIONAL: SEMANTIC SEARCH (Recommended) =====
|
||||
# AI-powered semantic search with automatic background operation setup
|
||||
#
|
||||
# When you enable semantic search in multi-user mode:
|
||||
# 1. ENABLE_SEMANTIC_SEARCH automatically enables background operations
|
||||
# 2. Server requests refresh tokens for offline indexing
|
||||
# 3. Tokens are stored encrypted in TOKEN_STORAGE_DB
|
||||
# 4. No need to set ENABLE_BACKGROUND_OPERATIONS separately!
|
||||
#
|
||||
ENABLE_SEMANTIC_SEARCH=true
|
||||
|
||||
# Vector Database (required for semantic search)
|
||||
QDRANT_URL=http://qdrant:6333
|
||||
# OR for in-memory mode:
|
||||
#QDRANT_LOCATION=:memory:
|
||||
|
||||
# Embedding Provider (required for semantic search)
|
||||
# Option 1: Ollama (recommended for local deployment)
|
||||
OLLAMA_BASE_URL=http://ollama:11434
|
||||
OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# Option 2: Amazon Bedrock (for AWS deployments)
|
||||
#AWS_REGION=us-east-1
|
||||
#BEDROCK_EMBEDDING_MODEL=amazon.titan-embed-text-v2:0
|
||||
|
||||
# Token Storage (required for background operations - auto-enabled by semantic search)
|
||||
# Generate encryption key: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
TOKEN_ENCRYPTION_KEY=your-encryption-key-here
|
||||
TOKEN_STORAGE_DB=/app/data/tokens.db
|
||||
|
||||
# ===== OPTIONAL: DOCUMENT PROCESSING =====
|
||||
# Extract text from PDFs, images, DOCX for semantic search
|
||||
#ENABLE_DOCUMENT_PROCESSING=true
|
||||
#ENABLE_UNSTRUCTURED=true
|
||||
#UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
|
||||
# ===== SUMMARY OF AUTO-ENABLEMENT =====
|
||||
# With ENABLE_SEMANTIC_SEARCH=true in OAuth mode:
|
||||
# ✅ Background operations enabled automatically
|
||||
# ✅ Refresh token storage enabled automatically
|
||||
# ✅ OAuth credentials required (DCR or pre-registered)
|
||||
# ✅ Encryption key required for token storage
|
||||
#
|
||||
# You only need to set ENABLE_SEMANTIC_SEARCH and provide the required
|
||||
# infrastructure (Qdrant, Ollama, encryption key). The rest is automatic!
|
||||
|
||||
# For more advanced configuration, see env.sample
|
||||
@@ -1,37 +0,0 @@
|
||||
# ============================================
|
||||
# SINGLE-USER BASICAUTH QUICK START
|
||||
# ============================================
|
||||
# Simplest deployment mode - one user, credentials in environment
|
||||
# Use for: Personal instances, local development, testing
|
||||
#
|
||||
# Copy this file to .env and fill in your credentials
|
||||
|
||||
# ===== REQUIRED SETTINGS =====
|
||||
# Your Nextcloud instance URL (without trailing slash)
|
||||
NEXTCLOUD_HOST=http://localhost:8080
|
||||
|
||||
# Your Nextcloud credentials
|
||||
NEXTCLOUD_USERNAME=admin
|
||||
NEXTCLOUD_PASSWORD=password
|
||||
|
||||
# ===== OPTIONAL: EXPLICIT MODE DECLARATION =====
|
||||
# Recommended to avoid ambiguity
|
||||
MCP_DEPLOYMENT_MODE=single_user_basic
|
||||
|
||||
# ===== OPTIONAL: SEMANTIC SEARCH =====
|
||||
# Uncomment to enable AI-powered semantic search
|
||||
# Requires: Qdrant + embedding provider (Ollama or Bedrock)
|
||||
#
|
||||
#ENABLE_SEMANTIC_SEARCH=true
|
||||
#QDRANT_LOCATION=:memory:
|
||||
#OLLAMA_BASE_URL=http://ollama:11434
|
||||
#OLLAMA_EMBEDDING_MODEL=nomic-embed-text
|
||||
|
||||
# ===== OPTIONAL: DOCUMENT PROCESSING =====
|
||||
# Extract text from PDFs, images, DOCX for semantic search
|
||||
#ENABLE_DOCUMENT_PROCESSING=true
|
||||
#ENABLE_UNSTRUCTURED=true
|
||||
#UNSTRUCTURED_API_URL=http://unstructured:8000
|
||||
|
||||
# That's it! Single-user mode is the simplest to configure.
|
||||
# For more options, see env.sample
|
||||
@@ -1,133 +0,0 @@
|
||||
"""Alembic environment configuration for nextcloud-mcp-server.
|
||||
|
||||
This module configures how Alembic runs database migrations for the
|
||||
token storage database. It supports both online and offline migration modes.
|
||||
|
||||
Uses anyio for async operations, consistent with the project's async patterns.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import anyio
|
||||
from sqlalchemy import pool
|
||||
from sqlalchemy.engine import Connection
|
||||
from sqlalchemy.ext.asyncio import async_engine_from_config
|
||||
|
||||
from alembic import context
|
||||
|
||||
# Configure logging
|
||||
logger = logging.getLogger("alembic.env")
|
||||
|
||||
# This is the Alembic Config object, which provides
|
||||
# access to the values within the .ini file in use.
|
||||
config = context.config
|
||||
|
||||
# Update script location to point to package location
|
||||
# This allows alembic to find migrations when installed in site-packages
|
||||
script_location = Path(__file__).parent
|
||||
config.set_main_option("script_location", str(script_location))
|
||||
|
||||
# We don't use SQLAlchemy models, so target_metadata is None
|
||||
# Migrations will be written manually using op.execute() for raw SQL
|
||||
target_metadata = None
|
||||
|
||||
|
||||
def get_database_url() -> str:
|
||||
"""
|
||||
Get the database URL from Alembic config or environment.
|
||||
|
||||
The URL can be set in alembic.ini or passed via -x database_url=...
|
||||
when running Alembic commands.
|
||||
|
||||
Returns:
|
||||
Database URL (SQLite URL format)
|
||||
"""
|
||||
# Check if URL is passed via -x database_url=...
|
||||
url = context.get_x_argument(as_dictionary=True).get("database_url")
|
||||
|
||||
if not url:
|
||||
# Fall back to alembic.ini configuration
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
|
||||
if not url:
|
||||
# Default to /app/data/tokens.db for Docker deployments
|
||||
db_path = Path("/app/data/tokens.db")
|
||||
url = f"sqlite+aiosqlite:///{db_path}"
|
||||
logger.warning(
|
||||
f"No database URL configured, using default: {url}. "
|
||||
"Set sqlalchemy.url in alembic.ini or pass -x database_url=..."
|
||||
)
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
"""Run migrations in 'offline' mode.
|
||||
|
||||
This configures the context with just a URL and not an Engine,
|
||||
though an Engine is acceptable here as well. By skipping the
|
||||
Engine creation we don't even need a DBAPI to be available.
|
||||
|
||||
Calls to context.execute() here emit the given string to the
|
||||
script output.
|
||||
|
||||
This mode is useful for generating SQL scripts without database access.
|
||||
"""
|
||||
url = get_database_url()
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection: Connection) -> None:
|
||||
"""Execute migrations within a database connection."""
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_async_migrations() -> None:
|
||||
"""Run migrations in 'online' mode with async support.
|
||||
|
||||
In this scenario we create an async Engine and associate
|
||||
a connection with the context.
|
||||
"""
|
||||
# Get database URL and update config
|
||||
url = get_database_url()
|
||||
config.set_main_option("sqlalchemy.url", url)
|
||||
|
||||
# Create async engine
|
||||
connectable = async_engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool, # Don't pool connections for migrations
|
||||
)
|
||||
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
"""Run migrations in 'online' mode.
|
||||
|
||||
This function is called from storage.py's initialize() method via
|
||||
anyio.to_thread.run_sync(), so it always runs in a worker thread
|
||||
with its own event loop. We can safely use anyio.run() here.
|
||||
"""
|
||||
anyio.run(run_async_migrations)
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@@ -1,185 +0,0 @@
|
||||
"""Initial schema for token storage database
|
||||
|
||||
This migration creates the initial database schema including:
|
||||
- refresh_tokens: OAuth refresh tokens and user profiles
|
||||
- audit_logs: Audit trail for security events
|
||||
- oauth_clients: OAuth client credentials (DCR)
|
||||
- oauth_sessions: OAuth flow session state (ADR-004 Progressive Consent)
|
||||
- registered_webhooks: Webhook registration tracking (both OAuth and BasicAuth)
|
||||
- schema_version: Legacy schema version tracking (deprecated, use alembic_version)
|
||||
|
||||
Revision ID: 001
|
||||
Revises:
|
||||
Create Date: 2025-12-17 22:00:00.000000
|
||||
|
||||
"""
|
||||
|
||||
from alembic import op
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision = "001"
|
||||
down_revision = None
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Create initial database schema."""
|
||||
|
||||
# Refresh tokens table (OAuth mode only, for background jobs)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS refresh_tokens (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid',
|
||||
token_audience TEXT DEFAULT 'nextcloud',
|
||||
provisioned_at INTEGER,
|
||||
provisioning_client_id TEXT,
|
||||
scopes TEXT,
|
||||
-- Browser session profile cache
|
||||
user_profile TEXT,
|
||||
profile_cached_at INTEGER
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Audit logs table (both OAuth and BasicAuth modes)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS audit_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
resource_type TEXT,
|
||||
resource_id TEXT,
|
||||
auth_method TEXT,
|
||||
hostname TEXT
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Index on audit logs for efficient queries
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_user_timestamp
|
||||
ON audit_logs(user_id, timestamp)
|
||||
"""
|
||||
)
|
||||
|
||||
# OAuth client credentials storage (OAuth mode only)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_clients (
|
||||
id INTEGER PRIMARY KEY,
|
||||
client_id TEXT UNIQUE NOT NULL,
|
||||
encrypted_client_secret BLOB NOT NULL,
|
||||
client_id_issued_at INTEGER NOT NULL,
|
||||
client_secret_expires_at INTEGER NOT NULL,
|
||||
redirect_uris TEXT NOT NULL,
|
||||
encrypted_registration_access_token BLOB,
|
||||
registration_client_uri TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# OAuth flow sessions (ADR-004 Progressive Consent)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
client_id TEXT,
|
||||
client_redirect_uri TEXT NOT NULL,
|
||||
state TEXT,
|
||||
code_challenge TEXT,
|
||||
code_challenge_method TEXT,
|
||||
mcp_authorization_code TEXT UNIQUE,
|
||||
idp_access_token TEXT,
|
||||
idp_refresh_token TEXT,
|
||||
user_id TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
expires_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid',
|
||||
requested_scopes TEXT,
|
||||
granted_scopes TEXT,
|
||||
is_provisioning BOOLEAN DEFAULT FALSE
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Index for MCP authorization code lookups
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_oauth_sessions_mcp_code
|
||||
ON oauth_sessions(mcp_authorization_code)
|
||||
"""
|
||||
)
|
||||
|
||||
# Legacy schema version tracking table
|
||||
# NOTE: This is deprecated in favor of Alembic's alembic_version table
|
||||
# Kept for backward compatibility with pre-Alembic databases
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Registered webhooks tracking (both BasicAuth and OAuth modes)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS registered_webhooks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
webhook_id INTEGER NOT NULL UNIQUE,
|
||||
preset_id TEXT NOT NULL,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Indexes for efficient webhook queries
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_webhooks_preset
|
||||
ON registered_webhooks(preset_id)
|
||||
"""
|
||||
)
|
||||
|
||||
op.execute(
|
||||
"""
|
||||
CREATE INDEX IF NOT EXISTS idx_webhooks_created
|
||||
ON registered_webhooks(created_at)
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Drop all tables and indexes.
|
||||
|
||||
WARNING: This will destroy all data in the database!
|
||||
Use with extreme caution.
|
||||
"""
|
||||
|
||||
# Drop indexes first
|
||||
op.execute("DROP INDEX IF EXISTS idx_webhooks_created")
|
||||
op.execute("DROP INDEX IF EXISTS idx_webhooks_preset")
|
||||
op.execute("DROP INDEX IF EXISTS idx_oauth_sessions_mcp_code")
|
||||
op.execute("DROP INDEX IF EXISTS idx_audit_user_timestamp")
|
||||
|
||||
# Drop tables
|
||||
op.execute("DROP TABLE IF EXISTS registered_webhooks")
|
||||
op.execute("DROP TABLE IF EXISTS schema_version")
|
||||
op.execute("DROP TABLE IF EXISTS oauth_sessions")
|
||||
op.execute("DROP TABLE IF EXISTS oauth_clients")
|
||||
op.execute("DROP TABLE IF EXISTS audit_logs")
|
||||
op.execute("DROP TABLE IF EXISTS refresh_tokens")
|
||||
@@ -1,6 +0,0 @@
|
||||
"""Management API for Nextcloud MCP Server.
|
||||
|
||||
Provides REST endpoints for the Nextcloud PHP app to query server status,
|
||||
user sessions, and vector sync metrics. All endpoints use OAuth bearer token
|
||||
authentication via the UnifiedTokenVerifier.
|
||||
"""
|
||||
File diff suppressed because it is too large
Load Diff
+334
-1231
File diff suppressed because it is too large
Load Diff
@@ -1,152 +0,0 @@
|
||||
"""
|
||||
Client for querying Astrolabe Management API for background sync credentials.
|
||||
|
||||
This client uses OAuth client credentials flow to authenticate to Nextcloud
|
||||
and retrieve user app passwords for background sync operations.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AstrolabeClient:
|
||||
"""Client for querying Astrolabe API for background sync credentials.
|
||||
|
||||
Uses OAuth client credentials flow to authenticate as the MCP server
|
||||
and retrieve user app passwords that are stored in Nextcloud.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
nextcloud_host: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
):
|
||||
"""
|
||||
Initialize Astrolabe client.
|
||||
|
||||
Args:
|
||||
nextcloud_host: Nextcloud base URL (e.g., https://cloud.example.com)
|
||||
client_id: OAuth client ID for MCP server
|
||||
client_secret: OAuth client secret
|
||||
"""
|
||||
self.nextcloud_host = nextcloud_host.rstrip("/")
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self._token_cache: Optional[dict] = None # {access_token, expires_at}
|
||||
|
||||
async def get_access_token(self) -> str:
|
||||
"""
|
||||
Get access token using OAuth client credentials flow.
|
||||
|
||||
Tokens are cached with 1-minute early refresh to avoid expiration.
|
||||
|
||||
Returns:
|
||||
Access token string
|
||||
|
||||
Raises:
|
||||
httpx.HTTPError: If token request fails
|
||||
"""
|
||||
# Check cache
|
||||
if self._token_cache and time.time() < self._token_cache["expires_at"]:
|
||||
logger.debug("Using cached OAuth token for Astrolabe API")
|
||||
return self._token_cache["access_token"]
|
||||
|
||||
# Discover token endpoint
|
||||
discovery_url = f"{self.nextcloud_host}/.well-known/openid-configuration"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
logger.debug(f"Discovering token endpoint from {discovery_url}")
|
||||
discovery_resp = await client.get(discovery_url)
|
||||
discovery_resp.raise_for_status()
|
||||
token_endpoint = discovery_resp.json()["token_endpoint"]
|
||||
|
||||
logger.debug(f"Requesting client credentials token from {token_endpoint}")
|
||||
|
||||
# Request token using client credentials grant
|
||||
token_resp = await client.post(
|
||||
token_endpoint,
|
||||
data={
|
||||
"grant_type": "client_credentials",
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"scope": "openid", # Minimal scope
|
||||
},
|
||||
)
|
||||
token_resp.raise_for_status()
|
||||
data = token_resp.json()
|
||||
|
||||
# Cache with 1-minute early refresh
|
||||
expires_in = data.get("expires_in", 3600)
|
||||
self._token_cache = {
|
||||
"access_token": data["access_token"],
|
||||
"expires_at": time.time() + expires_in - 60,
|
||||
}
|
||||
|
||||
logger.info(f"Obtained Astrolabe API token (expires in {expires_in}s)")
|
||||
return data["access_token"]
|
||||
|
||||
async def get_user_app_password(self, user_id: str) -> Optional[str]:
|
||||
"""
|
||||
Retrieve user's app password for background sync.
|
||||
|
||||
Args:
|
||||
user_id: Nextcloud user ID
|
||||
|
||||
Returns:
|
||||
App password string, or None if user hasn't provisioned
|
||||
|
||||
Raises:
|
||||
httpx.HTTPError: If API request fails (except 404)
|
||||
"""
|
||||
token = await self.get_access_token()
|
||||
url = f"{self.nextcloud_host}/apps/astrolabe/api/v1/background-sync/credentials/{user_id}"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
logger.debug(f"Retrieving app password for user: {user_id}")
|
||||
|
||||
response = await client.get(
|
||||
url,
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
timeout=10.0,
|
||||
)
|
||||
|
||||
if response.status_code == 404:
|
||||
logger.debug(f"No app password configured for user: {user_id}")
|
||||
return None
|
||||
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
logger.info(
|
||||
f"Retrieved app password for user: {user_id} (type: {data.get('credential_type')})"
|
||||
)
|
||||
return data.get("app_password")
|
||||
|
||||
async def get_background_sync_status(self, user_id: str) -> dict:
|
||||
"""
|
||||
Get background sync status for a user.
|
||||
|
||||
Args:
|
||||
user_id: Nextcloud user ID
|
||||
|
||||
Returns:
|
||||
Dict with keys: has_access, credential_type, provisioned_at
|
||||
|
||||
Raises:
|
||||
httpx.HTTPError: If API request fails
|
||||
"""
|
||||
# For now, check if app password exists
|
||||
# In the future, this could query a dedicated status endpoint
|
||||
app_password = await self.get_user_app_password(user_id)
|
||||
|
||||
return {
|
||||
"has_access": app_password is not None,
|
||||
"credential_type": "app_password" if app_password else None,
|
||||
"provisioned_at": None, # TODO: Get from API if available
|
||||
}
|
||||
@@ -24,26 +24,6 @@ from nextcloud_mcp_server.auth.userinfo_routes import (
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _should_use_secure_cookies() -> bool:
|
||||
"""Determine if cookies should have secure flag.
|
||||
|
||||
Checks COOKIE_SECURE env var first, then auto-detects from NEXTCLOUD_HOST.
|
||||
|
||||
Returns:
|
||||
True if cookies should be secure (HTTPS), False otherwise
|
||||
"""
|
||||
# Explicit configuration takes precedence
|
||||
explicit = os.getenv("COOKIE_SECURE", "").lower()
|
||||
if explicit == "true":
|
||||
return True
|
||||
if explicit == "false":
|
||||
return False
|
||||
|
||||
# Auto-detect from NEXTCLOUD_HOST protocol
|
||||
nextcloud_host = os.getenv("NEXTCLOUD_HOST", "")
|
||||
return nextcloud_host.startswith("https://")
|
||||
|
||||
|
||||
async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"""Browser OAuth login endpoint - redirects to IdP for authentication.
|
||||
|
||||
@@ -70,10 +50,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
logger.info(f"oauth_login called - client_id: {oauth_config.get('client_id')}")
|
||||
logger.info(f"oauth_login called - oauth_client: {oauth_client is not None}")
|
||||
|
||||
# Get redirect URL from query params (default to /app)
|
||||
next_url = request.query_params.get("next", "/app")
|
||||
logger.info(f"oauth_login - next_url: {next_url}")
|
||||
|
||||
# Generate state for CSRF protection
|
||||
state = secrets.token_urlsafe(32)
|
||||
|
||||
@@ -95,7 +71,7 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
await storage.store_oauth_session(
|
||||
session_id=state, # Use state as session ID
|
||||
client_id="browser-ui",
|
||||
client_redirect_uri=next_url, # Store the redirect URL for after auth
|
||||
client_redirect_uri="/app",
|
||||
state=state,
|
||||
code_challenge=code_challenge,
|
||||
code_challenge_method="S256",
|
||||
@@ -109,11 +85,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
if not oauth_client.authorization_endpoint:
|
||||
await oauth_client.discover()
|
||||
|
||||
# Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
|
||||
nextcloud_resource_uri = oauth_config.get(
|
||||
"nextcloud_resource_uri", oauth_config.get("nextcloud_host")
|
||||
)
|
||||
|
||||
idp_params = {
|
||||
"client_id": oauth_client.client_id,
|
||||
"redirect_uri": callback_uri,
|
||||
@@ -123,7 +94,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"code_challenge": code_challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"prompt": "consent", # Ensure refresh token
|
||||
"resource": nextcloud_resource_uri, # Request tokens for Nextcloud API access
|
||||
}
|
||||
|
||||
auth_url = f"{oauth_client.authorization_endpoint}?{urlencode(idp_params)}"
|
||||
@@ -161,11 +131,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
f"{public_parsed.scheme}://{public_parsed.netloc}{auth_parsed.path}"
|
||||
)
|
||||
|
||||
# Get Nextcloud resource URI for audience (background sync needs Nextcloud-scoped tokens)
|
||||
nextcloud_resource_uri = oauth_config.get(
|
||||
"nextcloud_resource_uri", oauth_config.get("nextcloud_host")
|
||||
)
|
||||
|
||||
idp_params = {
|
||||
"client_id": oauth_config["client_id"],
|
||||
"redirect_uri": callback_uri,
|
||||
@@ -175,7 +140,6 @@ async def oauth_login(request: Request) -> RedirectResponse | JSONResponse:
|
||||
"code_challenge": code_challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"prompt": "consent", # Ensure refresh token
|
||||
"resource": nextcloud_resource_uri, # Request tokens for Nextcloud API access
|
||||
}
|
||||
|
||||
# Debug: Log full parameters
|
||||
@@ -250,15 +214,12 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
oauth_client = oauth_ctx["oauth_client"]
|
||||
oauth_config = oauth_ctx["config"]
|
||||
|
||||
# Retrieve code_verifier and redirect URL from session storage
|
||||
# Retrieve code_verifier from session storage (PKCE required for all modes)
|
||||
code_verifier = ""
|
||||
next_url = "/app" # Default redirect
|
||||
oauth_session = await storage.get_oauth_session(state)
|
||||
if oauth_session:
|
||||
# code_verifier was stored in mcp_authorization_code field
|
||||
code_verifier = oauth_session.get("mcp_authorization_code", "")
|
||||
# next_url was stored in client_redirect_uri field
|
||||
next_url = oauth_session.get("client_redirect_uri", "/app")
|
||||
# Clean up the temporary session
|
||||
# Note: We don't have delete_oauth_session method, but it will expire after TTL
|
||||
|
||||
@@ -377,35 +338,16 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
user_id = f"user-{secrets.token_hex(8)}"
|
||||
username = "unknown"
|
||||
|
||||
# Calculate refresh token expiration from token response
|
||||
refresh_expires_in = token_data.get("refresh_expires_in")
|
||||
refresh_expires_at = None
|
||||
if refresh_expires_in:
|
||||
import time
|
||||
|
||||
refresh_expires_at = int(time.time()) + refresh_expires_in
|
||||
logger.info(
|
||||
f"Refresh token expires in {refresh_expires_in}s (at timestamp {refresh_expires_at})"
|
||||
)
|
||||
|
||||
# Extract granted scopes
|
||||
granted_scopes = (
|
||||
token_data.get("scope", "").split() if token_data.get("scope") else None
|
||||
)
|
||||
|
||||
# Store refresh token (for background jobs ONLY)
|
||||
if refresh_token:
|
||||
logger.info(f"Storing refresh token for user_id: {user_id}")
|
||||
logger.info(f" State parameter (provisioning_client_id): {state[:16]}...")
|
||||
logger.info(f" Granted scopes: {granted_scopes}")
|
||||
logger.info(f" Expires at: {refresh_expires_at}")
|
||||
await storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=refresh_token,
|
||||
expires_at=refresh_expires_at,
|
||||
expires_at=None,
|
||||
flow_type="browser", # Browser-based login flow
|
||||
provisioning_client_id=state, # Store state for unified session lookup
|
||||
scopes=granted_scopes,
|
||||
)
|
||||
logger.info(f"✓ Refresh token stored successfully for user_id: {user_id}")
|
||||
logger.info(
|
||||
@@ -441,14 +383,13 @@ async def oauth_login_callback(request: Request) -> RedirectResponse | HTMLRespo
|
||||
# Continue anyway - profile cache is optional for browser UI
|
||||
|
||||
# Create response and set session cookie
|
||||
# Redirect to stored next_url (from OAuth session) or /app as default
|
||||
response = RedirectResponse(next_url, status_code=302)
|
||||
response = RedirectResponse("/app", status_code=302)
|
||||
response.set_cookie(
|
||||
key="mcp_session",
|
||||
value=user_id,
|
||||
max_age=86400 * 30, # 30 days
|
||||
httponly=True,
|
||||
secure=_should_use_secure_cookies(),
|
||||
secure=False, # Set to True in production with HTTPS
|
||||
samesite="lax",
|
||||
)
|
||||
|
||||
|
||||
@@ -517,23 +517,12 @@ async def oauth_callback_nextcloud(request: Request):
|
||||
token_data.get("scope", "").split() if token_data.get("scope") else None
|
||||
)
|
||||
|
||||
# Calculate refresh token expiration from token response
|
||||
refresh_expires_in = token_data.get("refresh_expires_in")
|
||||
refresh_expires_at = None
|
||||
if refresh_expires_in:
|
||||
import time
|
||||
|
||||
refresh_expires_at = int(time.time()) + refresh_expires_in
|
||||
logger.info(f" refresh_expires_in: {refresh_expires_in}s")
|
||||
logger.info(f" refresh_expires_at: {refresh_expires_at}")
|
||||
|
||||
logger.info("Storing refresh token:")
|
||||
logger.info(f" user_id: {user_id}")
|
||||
logger.info(" flow_type: flow2")
|
||||
logger.info(" token_audience: nextcloud")
|
||||
logger.info(f" provisioning_client_id: {state[:16]}...")
|
||||
logger.info(f" scopes: {granted_scopes}")
|
||||
logger.info(f" expires_at: {refresh_expires_at}")
|
||||
|
||||
await storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
@@ -542,7 +531,7 @@ async def oauth_callback_nextcloud(request: Request):
|
||||
token_audience="nextcloud",
|
||||
provisioning_client_id=state, # Store which client initiated provisioning
|
||||
scopes=granted_scopes,
|
||||
expires_at=refresh_expires_at,
|
||||
expires_at=None, # Refresh tokens typically don't expire
|
||||
)
|
||||
logger.info(f"✓ Stored Flow 2 master refresh token for user {user_id}")
|
||||
logger.info("=" * 60)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
"""Scope-based authorization for MCP tools."""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from functools import wraps
|
||||
from typing import Any, Callable
|
||||
|
||||
@@ -130,12 +131,9 @@ def require_scopes(*required_scopes: str):
|
||||
required_scopes_set = set(required_scopes)
|
||||
|
||||
# Check if offline access is enabled
|
||||
# Use settings.enable_offline_access which handles both ENABLE_BACKGROUND_OPERATIONS (new)
|
||||
# and ENABLE_OFFLINE_ACCESS (deprecated) environment variables
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
enable_offline_access = settings.enable_offline_access
|
||||
enable_offline_access = (
|
||||
os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() == "true"
|
||||
)
|
||||
|
||||
# In offline access mode, check if Nextcloud scopes require provisioning
|
||||
if enable_offline_access:
|
||||
|
||||
@@ -190,30 +190,3 @@
|
||||
color: var(--color-text-maxcontrast);
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
/* PDF highlighted image styles */
|
||||
.chunk-image-container {
|
||||
margin-bottom: 16px;
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: var(--border-radius);
|
||||
overflow: hidden;
|
||||
background: #fff;
|
||||
}
|
||||
.chunk-image-header {
|
||||
background: var(--color-background-dark);
|
||||
padding: 8px 12px;
|
||||
font-size: 12px;
|
||||
font-weight: 500;
|
||||
color: var(--color-text-maxcontrast);
|
||||
border-bottom: 1px solid var(--color-border);
|
||||
font-family: var(--font-face);
|
||||
}
|
||||
.chunk-highlighted-image {
|
||||
display: block;
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
cursor: zoom-in;
|
||||
}
|
||||
.chunk-highlighted-image:hover {
|
||||
opacity: 0.95;
|
||||
}
|
||||
|
||||
@@ -201,15 +201,8 @@ function vizApp() {
|
||||
return `${baseUrl}/apps/calendar`;
|
||||
case 'contact':
|
||||
return `${baseUrl}/apps/contacts`;
|
||||
case 'deck_card':
|
||||
// URL pattern: /apps/deck/board/:boardId/card/:cardId
|
||||
if (result.metadata && result.metadata.board_id) {
|
||||
return `${baseUrl}/apps/deck/board/${result.metadata.board_id}/card/${result.id}`;
|
||||
}
|
||||
// Fallback if board_id not available
|
||||
case 'deck':
|
||||
return `${baseUrl}/apps/deck`;
|
||||
case 'news_item':
|
||||
return `${baseUrl}/apps/news/item/${result.id}`;
|
||||
default:
|
||||
return `${baseUrl}`;
|
||||
}
|
||||
@@ -224,7 +217,7 @@ function vizApp() {
|
||||
},
|
||||
|
||||
async toggleChunk(result) {
|
||||
const resultKey = `${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`;
|
||||
const resultKey = `${result.doc_type}_${result.id}`;
|
||||
|
||||
if (this.isChunkExpanded(resultKey)) {
|
||||
delete this.expandedChunks[resultKey];
|
||||
|
||||
@@ -117,14 +117,7 @@ class RefreshTokenStorage:
|
||||
return cls(db_path=db_path, encryption_key=encryption_key)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""
|
||||
Initialize database schema using Alembic migrations.
|
||||
|
||||
This method handles three scenarios:
|
||||
1. New database: Run migrations from scratch
|
||||
2. Pre-Alembic database: Stamp with initial revision (no changes)
|
||||
3. Alembic-managed database: Upgrade to latest version
|
||||
"""
|
||||
"""Initialize database schema"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
@@ -132,59 +125,137 @@ class RefreshTokenStorage:
|
||||
db_dir = Path(self.db_path).parent
|
||||
db_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Set restrictive permissions on database file if it exists
|
||||
# Set restrictive permissions on database file
|
||||
if Path(self.db_path).exists():
|
||||
os.chmod(self.db_path, 0o600)
|
||||
|
||||
# Check database state and run appropriate migration strategy
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
# Check if database is managed by Alembic
|
||||
cursor = await db.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='alembic_version'"
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS refresh_tokens (
|
||||
user_id TEXT PRIMARY KEY,
|
||||
encrypted_token BLOB NOT NULL,
|
||||
expires_at INTEGER,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid', -- 'hybrid', 'flow1', 'flow2'
|
||||
token_audience TEXT DEFAULT 'nextcloud', -- 'mcp-server' or 'nextcloud'
|
||||
provisioned_at INTEGER, -- When Flow 2 was completed
|
||||
provisioning_client_id TEXT, -- Which MCP client initiated Flow 1
|
||||
scopes TEXT, -- JSON array of granted scopes
|
||||
-- Browser session profile cache
|
||||
user_profile TEXT, -- JSON cache of IdP user profile (for browser UI only)
|
||||
profile_cached_at INTEGER -- When profile was last cached
|
||||
)
|
||||
"""
|
||||
)
|
||||
has_alembic = await cursor.fetchone() is not None
|
||||
|
||||
if not has_alembic:
|
||||
# Check if this is a pre-Alembic database with existing schema
|
||||
cursor = await db.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='refresh_tokens'"
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS audit_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp INTEGER NOT NULL,
|
||||
event TEXT NOT NULL,
|
||||
user_id TEXT NOT NULL,
|
||||
resource_type TEXT,
|
||||
resource_id TEXT,
|
||||
auth_method TEXT,
|
||||
hostname TEXT
|
||||
)
|
||||
has_schema = await cursor.fetchone() is not None
|
||||
"""
|
||||
)
|
||||
|
||||
if has_schema:
|
||||
logger.info(
|
||||
f"Detected pre-Alembic database at {self.db_path}, "
|
||||
"stamping with initial revision"
|
||||
)
|
||||
else:
|
||||
logger.info(
|
||||
f"Initializing new database at {self.db_path} with migrations"
|
||||
)
|
||||
# Create index on audit logs for efficient queries
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_audit_user_timestamp "
|
||||
"ON audit_logs(user_id, timestamp)"
|
||||
)
|
||||
|
||||
# Run migrations in a worker thread using anyio.to_thread
|
||||
# This allows Alembic to run its own async operations in a separate context
|
||||
from anyio import to_thread
|
||||
|
||||
from nextcloud_mcp_server.migrations import stamp_database, upgrade_database
|
||||
|
||||
if not has_alembic:
|
||||
if has_schema:
|
||||
# Stamp existing database without running migrations
|
||||
await to_thread.run_sync(stamp_database, self.db_path, "001")
|
||||
logger.info(
|
||||
"Pre-Alembic database stamped successfully. "
|
||||
"Future schema changes will use migrations."
|
||||
# OAuth client credentials storage
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_clients (
|
||||
id INTEGER PRIMARY KEY,
|
||||
client_id TEXT UNIQUE NOT NULL,
|
||||
encrypted_client_secret BLOB NOT NULL,
|
||||
client_id_issued_at INTEGER NOT NULL,
|
||||
client_secret_expires_at INTEGER NOT NULL,
|
||||
redirect_uris TEXT NOT NULL,
|
||||
encrypted_registration_access_token BLOB,
|
||||
registration_client_uri TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
else:
|
||||
# New database - run migrations
|
||||
await to_thread.run_sync(upgrade_database, self.db_path, "head")
|
||||
logger.info("Database initialized with migrations")
|
||||
else:
|
||||
# Alembic-managed database - upgrade to latest
|
||||
await to_thread.run_sync(upgrade_database, self.db_path, "head")
|
||||
logger.info("Database upgraded to latest version")
|
||||
"""
|
||||
)
|
||||
|
||||
# Set restrictive permissions after initialization
|
||||
# OAuth flow sessions (ADR-004 Progressive Consent)
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS oauth_sessions (
|
||||
session_id TEXT PRIMARY KEY,
|
||||
client_id TEXT,
|
||||
client_redirect_uri TEXT NOT NULL,
|
||||
state TEXT,
|
||||
code_challenge TEXT,
|
||||
code_challenge_method TEXT,
|
||||
mcp_authorization_code TEXT UNIQUE,
|
||||
idp_access_token TEXT,
|
||||
idp_refresh_token TEXT,
|
||||
user_id TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
expires_at INTEGER NOT NULL,
|
||||
-- ADR-004 Progressive Consent fields
|
||||
flow_type TEXT DEFAULT 'hybrid', -- 'hybrid', 'flow1', 'flow2'
|
||||
requested_scopes TEXT, -- JSON array of requested scopes
|
||||
granted_scopes TEXT, -- JSON array of granted scopes
|
||||
is_provisioning BOOLEAN DEFAULT FALSE -- True if this is a Flow 2 provisioning session
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Create index for MCP authorization code lookups
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_oauth_sessions_mcp_code "
|
||||
"ON oauth_sessions(mcp_authorization_code)"
|
||||
)
|
||||
|
||||
# Schema version tracking
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS schema_version (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Registered webhooks tracking (both BasicAuth and OAuth modes)
|
||||
await db.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS registered_webhooks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
webhook_id INTEGER NOT NULL UNIQUE,
|
||||
preset_id TEXT NOT NULL,
|
||||
created_at REAL NOT NULL
|
||||
)
|
||||
"""
|
||||
)
|
||||
|
||||
# Create indexes for efficient webhook queries
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_webhooks_preset "
|
||||
"ON registered_webhooks(preset_id)"
|
||||
)
|
||||
await db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_webhooks_created "
|
||||
"ON registered_webhooks(created_at)"
|
||||
)
|
||||
|
||||
await db.commit()
|
||||
|
||||
# Set restrictive permissions after creation
|
||||
os.chmod(self.db_path, 0o600)
|
||||
|
||||
self._initialized = True
|
||||
@@ -216,8 +287,6 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
encrypted_token = self.cipher.encrypt(refresh_token.encode())
|
||||
now = int(time.time())
|
||||
scopes_json = json.dumps(scopes) if scopes else None
|
||||
@@ -363,9 +432,6 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
start_time = time.time()
|
||||
try:
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
@@ -450,9 +516,6 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
async with db.execute(
|
||||
"""
|
||||
@@ -624,9 +687,6 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
# Encrypt sensitive data
|
||||
encrypted_secret = self.cipher.encrypt(client_secret.encode())
|
||||
encrypted_reg_token = (
|
||||
@@ -697,9 +757,6 @@ class RefreshTokenStorage:
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Type narrowing: cipher is set after initialize()
|
||||
assert self.cipher is not None
|
||||
|
||||
async with aiosqlite.connect(self.db_path) as db:
|
||||
async with db.execute(
|
||||
"""
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js"></script>
|
||||
|
||||
<!-- Plotly.js for vector visualization -->
|
||||
<script src="https://cdn.plot.ly/plotly-3.3.0.min.js"></script>
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/3.1.1/plotly.min.js"></script>
|
||||
|
||||
<!-- Vector Viz static assets -->
|
||||
<link rel="stylesheet" href="/app/static/vector-viz.css">
|
||||
|
||||
@@ -65,12 +65,8 @@
|
||||
<span>Contacts</span>
|
||||
</label>
|
||||
<label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
|
||||
<input type="checkbox" x-model="docTypes" value="deck_card" style="margin-right: 4px;">
|
||||
<span>Deck Cards</span>
|
||||
</label>
|
||||
<label style="display: flex; align-items: center; cursor: pointer; font-weight: normal;">
|
||||
<input type="checkbox" x-model="docTypes" value="news_item" style="margin-right: 4px;">
|
||||
<span>News</span>
|
||||
<input type="checkbox" x-model="docTypes" value="deck" style="margin-right: 4px;">
|
||||
<span>Deck</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
@@ -121,13 +117,12 @@
|
||||
|
||||
<template x-if="!loading && results.length > 0">
|
||||
<div x-transition.opacity.duration.200ms>
|
||||
<template x-for="result in results" :key="`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`">
|
||||
<template x-for="result in results" :key="result.id">
|
||||
<div style="padding: 12px; border-bottom: 1px solid #eee;">
|
||||
<a :href="getNextcloudUrl(result)" target="_blank" style="font-weight: 500; color: #0066cc; text-decoration: none;">
|
||||
<span x-text="result.title"></span>
|
||||
</a>
|
||||
<div style="font-size: 14px; color: #666; margin-top: 4px;"
|
||||
x-text="result.excerpt.length > 200 ? result.excerpt.substring(0, 200) + '...' : result.excerpt"></div>
|
||||
<div style="font-size: 14px; color: #666; margin-top: 4px;" x-text="result.excerpt"></div>
|
||||
<div style="font-size: 12px; color: #999; margin-top: 4px;">
|
||||
Raw Score: <span x-text="result.original_score.toFixed(3)"></span>
|
||||
(<span x-text="(result.score * 100).toFixed(0)"></span>% relative) |
|
||||
@@ -139,36 +134,22 @@
|
||||
<button
|
||||
class="chunk-toggle-btn"
|
||||
@click="toggleChunk(result)"
|
||||
x-text="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`) ? 'Hide Chunk' : 'Show Chunk'"
|
||||
x-text="isChunkExpanded(`${result.doc_type}_${result.id}`) ? 'Hide Chunk' : 'Show Chunk'"
|
||||
></button>
|
||||
</template>
|
||||
|
||||
<!-- Chunk context (expanded inline) -->
|
||||
<template x-if="isChunkExpanded(`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`)">
|
||||
<template x-if="isChunkExpanded(`${result.doc_type}_${result.id}`)">
|
||||
<div class="chunk-context" x-transition.opacity.duration.200ms>
|
||||
<template x-if="chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
|
||||
<template x-if="chunkLoading[`${result.doc_type}_${result.id}`]">
|
||||
<div style="color: #666; font-style: italic;">Loading chunk...</div>
|
||||
</template>
|
||||
<template x-if="!chunkLoading[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]">
|
||||
<template x-if="!chunkLoading[`${result.doc_type}_${result.id}`]">
|
||||
<div>
|
||||
<!-- Highlighted page image for PDFs -->
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image">
|
||||
<div class="chunk-image-container">
|
||||
<div class="chunk-image-header">
|
||||
<span>Page <span x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"></span></span>
|
||||
</div>
|
||||
<img
|
||||
:src="'data:image/png;base64,' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.highlighted_page_image"
|
||||
:alt="'Page ' + expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.page_number"
|
||||
class="chunk-highlighted-image"
|
||||
/>
|
||||
</div>
|
||||
</template>
|
||||
<!-- Text context -->
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_before">
|
||||
<template x-if="expandedChunks[`${result.doc_type}_${result.id}`]?.has_more_before">
|
||||
<span class="chunk-ellipsis">...</span>
|
||||
</template>
|
||||
<span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}_${result.chunk_start_offset || 0}`]?.has_more_after">
|
||||
<span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.before_context"></span><span class="chunk-matched" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.chunk_text"></span><span class="chunk-text" x-text="expandedChunks[`${result.doc_type}_${result.id}`]?.after_context"></span><template x-if="expandedChunks[`${result.doc_type}_${result.id}`]?.has_more_after">
|
||||
<span class="chunk-ellipsis">...</span>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
@@ -21,6 +21,7 @@ from typing import Dict, Optional, Tuple
|
||||
import anyio
|
||||
import httpx
|
||||
import jwt
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
from nextcloud_mcp_server.auth.storage import RefreshTokenStorage
|
||||
from nextcloud_mcp_server.auth.token_exchange import exchange_token_for_delegation
|
||||
@@ -103,8 +104,7 @@ class TokenBrokerService:
|
||||
storage: RefreshTokenStorage,
|
||||
oidc_discovery_url: str,
|
||||
nextcloud_host: str,
|
||||
client_id: str,
|
||||
client_secret: str,
|
||||
encryption_key: str,
|
||||
cache_ttl: int = 300,
|
||||
cache_early_refresh: int = 30,
|
||||
):
|
||||
@@ -112,25 +112,23 @@ class TokenBrokerService:
|
||||
Initialize the Token Broker Service.
|
||||
|
||||
Args:
|
||||
storage: Database storage for refresh tokens (handles encryption internally)
|
||||
storage: Database storage for refresh tokens
|
||||
oidc_discovery_url: OIDC provider discovery URL
|
||||
nextcloud_host: Nextcloud server URL
|
||||
client_id: OAuth client ID for token operations
|
||||
client_secret: OAuth client secret for token operations
|
||||
encryption_key: Fernet key for token encryption
|
||||
cache_ttl: Cache TTL in seconds (default: 5 minutes)
|
||||
cache_early_refresh: Early refresh threshold in seconds (default: 30 seconds)
|
||||
"""
|
||||
self.storage = storage
|
||||
self.oidc_discovery_url = oidc_discovery_url
|
||||
self.nextcloud_host = nextcloud_host
|
||||
self.client_id = client_id
|
||||
self.client_secret = client_secret
|
||||
self.fernet = Fernet(
|
||||
encryption_key.encode()
|
||||
if isinstance(encryption_key, str)
|
||||
else encryption_key
|
||||
)
|
||||
self.cache = TokenCache(cache_ttl, cache_early_refresh)
|
||||
self._oidc_config = None
|
||||
|
||||
# Per-user locks for token refresh operations (prevents race conditions)
|
||||
self._user_refresh_locks: dict[str, anyio.Lock] = {}
|
||||
self._locks_lock = anyio.Lock() # Protects the locks dict itself
|
||||
self._http_client = None
|
||||
|
||||
async def _get_http_client(self) -> httpx.AsyncClient:
|
||||
@@ -141,24 +139,6 @@ class TokenBrokerService:
|
||||
)
|
||||
return self._http_client
|
||||
|
||||
async def _get_user_refresh_lock(self, user_id: str) -> anyio.Lock:
|
||||
"""
|
||||
Get or create a lock for a specific user's refresh operations.
|
||||
|
||||
This prevents race conditions when multiple concurrent requests
|
||||
attempt to refresh the same user's token simultaneously.
|
||||
|
||||
Args:
|
||||
user_id: User ID to get lock for
|
||||
|
||||
Returns:
|
||||
anyio.Lock for this user's refresh operations
|
||||
"""
|
||||
async with self._locks_lock:
|
||||
if user_id not in self._user_refresh_locks:
|
||||
self._user_refresh_locks[user_id] = anyio.Lock()
|
||||
return self._user_refresh_locks[user_id]
|
||||
|
||||
async def _get_oidc_config(self) -> dict:
|
||||
"""Get OIDC configuration from discovery endpoint."""
|
||||
if self._oidc_config is None:
|
||||
@@ -200,8 +180,9 @@ class TokenBrokerService:
|
||||
return None
|
||||
|
||||
try:
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
# Decrypt refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(encrypted_token.encode()).decode()
|
||||
|
||||
# Exchange refresh token for new access token
|
||||
access_token, expires_in = await self._refresh_access_token(refresh_token)
|
||||
@@ -290,79 +271,41 @@ class TokenBrokerService:
|
||||
"""
|
||||
# Check cache first (background tokens can be cached)
|
||||
cache_key = f"{user_id}:background:{','.join(sorted(required_scopes))}"
|
||||
refresh_in_progress_key = f"{user_id}:refresh_in_progress"
|
||||
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
return cached_token
|
||||
|
||||
# Acquire per-user lock BEFORE refresh operation to prevent race conditions
|
||||
refresh_lock = await self._get_user_refresh_lock(user_id)
|
||||
async with refresh_lock:
|
||||
# Double-check cache after acquiring lock
|
||||
# (another thread may have refreshed while we waited)
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
logger.debug(
|
||||
f"Token found in cache after lock acquisition for user {user_id}"
|
||||
)
|
||||
return cached_token
|
||||
# Get stored refresh token
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if not refresh_data:
|
||||
logger.info(f"No refresh token found for user {user_id}")
|
||||
return None
|
||||
|
||||
# Check if another thread is currently refreshing
|
||||
if await self.cache.get(refresh_in_progress_key):
|
||||
logger.debug(f"Refresh in progress for user {user_id}, waiting briefly")
|
||||
await anyio.sleep(0.1) # Brief wait for in-progress refresh
|
||||
# Check cache one more time after wait
|
||||
cached_token = await self.cache.get(cache_key)
|
||||
if cached_token:
|
||||
logger.debug(
|
||||
f"Token refreshed by another thread for user {user_id}"
|
||||
)
|
||||
return cached_token
|
||||
try:
|
||||
# Decrypt refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(encrypted_token.encode()).decode()
|
||||
|
||||
# Mark refresh as in-progress
|
||||
await self.cache.set(refresh_in_progress_key, "true", expires_in=5)
|
||||
# Get token with specific scopes for background operation
|
||||
access_token, expires_in = await self._refresh_access_token_with_scopes(
|
||||
refresh_token, required_scopes
|
||||
)
|
||||
|
||||
try:
|
||||
# Get stored refresh token
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if not refresh_data:
|
||||
logger.info(f"No refresh token found for user {user_id}")
|
||||
return None
|
||||
# Cache the background token
|
||||
await self.cache.set(cache_key, access_token, expires_in)
|
||||
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
logger.info(
|
||||
f"Generated background token for user {user_id} with scopes: {required_scopes}"
|
||||
)
|
||||
|
||||
# Get token with specific scopes for background operation
|
||||
# Pass user_id to enable refresh token rotation storage
|
||||
access_token, expires_in = await self._refresh_access_token_with_scopes(
|
||||
refresh_token, required_scopes, user_id=user_id
|
||||
)
|
||||
return access_token
|
||||
|
||||
# Cache the background token
|
||||
await self.cache.set(cache_key, access_token, expires_in)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get background token for user {user_id}: {e}")
|
||||
await self.cache.invalidate(cache_key)
|
||||
return None
|
||||
|
||||
logger.info(
|
||||
f"Generated background token for user {user_id} with scopes: {required_scopes}"
|
||||
)
|
||||
|
||||
return access_token
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to get background token for user {user_id}: {e}",
|
||||
exc_info=True,
|
||||
)
|
||||
await self.cache.invalidate(cache_key)
|
||||
return None
|
||||
|
||||
finally:
|
||||
# Always clear the in-progress marker
|
||||
await self.cache.invalidate(refresh_in_progress_key)
|
||||
|
||||
async def _refresh_access_token(
|
||||
self, refresh_token: str, user_id: str | None = None
|
||||
) -> Tuple[str, int]:
|
||||
async def _refresh_access_token(self, refresh_token: str) -> Tuple[str, int]:
|
||||
"""
|
||||
Exchange refresh token for new access token.
|
||||
|
||||
@@ -370,7 +313,6 @@ class TokenBrokerService:
|
||||
|
||||
Args:
|
||||
refresh_token: The refresh token
|
||||
user_id: If provided, store the rotated refresh token for this user
|
||||
|
||||
Returns:
|
||||
Tuple of (access_token, expires_in_seconds)
|
||||
@@ -381,13 +323,10 @@ class TokenBrokerService:
|
||||
client = await self._get_http_client()
|
||||
|
||||
# Request new access token using refresh token
|
||||
# Include client credentials as required by most OAuth servers
|
||||
data = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"scope": "openid profile email offline_access notes:read notes:write calendar:read calendar:write",
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
"scope": "openid profile email notes:read notes:write calendar:read calendar:write",
|
||||
}
|
||||
|
||||
response = await client.post(
|
||||
@@ -406,41 +345,23 @@ class TokenBrokerService:
|
||||
access_token = token_data["access_token"]
|
||||
expires_in = token_data.get("expires_in", 3600) # Default 1 hour
|
||||
|
||||
# Handle refresh token rotation (Nextcloud OIDC rotates on every use)
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
if user_id and new_refresh_token and new_refresh_token != refresh_token:
|
||||
# Calculate expiry as Unix timestamp (90 days from now)
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
logger.info(f"Stored rotated refresh token for user {user_id}")
|
||||
|
||||
# Note: Nextcloud validates token audience on API calls - no need to pre-validate here
|
||||
# Validate audience
|
||||
await self._validate_token_audience(access_token, "nextcloud")
|
||||
|
||||
logger.info(f"Refreshed access token (expires in {expires_in}s)")
|
||||
return access_token, expires_in
|
||||
|
||||
async def _refresh_access_token_with_scopes(
|
||||
self, refresh_token: str, required_scopes: list[str], user_id: str | None = None
|
||||
self, refresh_token: str, required_scopes: list[str]
|
||||
) -> Tuple[str, int]:
|
||||
"""
|
||||
Exchange refresh token for new access token with specific scopes.
|
||||
|
||||
This method implements scope downscoping for least privilege.
|
||||
|
||||
IMPORTANT: Nextcloud OIDC rotates refresh tokens on every use (one-time use).
|
||||
When user_id is provided, this method stores the new refresh token returned
|
||||
by Nextcloud to ensure subsequent refresh operations succeed.
|
||||
|
||||
Args:
|
||||
refresh_token: The refresh token
|
||||
required_scopes: Minimal scopes needed for this operation
|
||||
user_id: If provided, store the rotated refresh token for this user
|
||||
|
||||
Returns:
|
||||
Tuple of (access_token, expires_in_seconds)
|
||||
@@ -450,25 +371,16 @@ class TokenBrokerService:
|
||||
|
||||
client = await self._get_http_client()
|
||||
|
||||
# Always include basic OpenID scopes + offline_access to get new refresh token
|
||||
scopes = list(
|
||||
set(["openid", "profile", "email", "offline_access"] + required_scopes)
|
||||
)
|
||||
# Always include basic OpenID scopes
|
||||
scopes = list(set(["openid", "profile", "email"] + required_scopes))
|
||||
|
||||
# Request new access token with specific scopes
|
||||
# Include client credentials as required by most OAuth servers
|
||||
data = {
|
||||
"grant_type": "refresh_token",
|
||||
"refresh_token": refresh_token,
|
||||
"scope": " ".join(scopes),
|
||||
"client_id": self.client_id,
|
||||
"client_secret": self.client_secret,
|
||||
}
|
||||
|
||||
logger.info(
|
||||
f"Token refresh request to {token_endpoint} with client_id={self.client_id[:16]}..."
|
||||
)
|
||||
|
||||
response = await client.post(
|
||||
token_endpoint,
|
||||
data=data,
|
||||
@@ -479,29 +391,14 @@ class TokenBrokerService:
|
||||
logger.error(
|
||||
f"Token refresh with scopes failed: {response.status_code} - {response.text}"
|
||||
)
|
||||
logger.error(f" client_id used: {self.client_id[:16]}...")
|
||||
raise Exception(f"Token refresh failed: {response.status_code}")
|
||||
|
||||
token_data = response.json()
|
||||
access_token = token_data["access_token"]
|
||||
expires_in = token_data.get("expires_in", 3600) # Default 1 hour
|
||||
|
||||
# Handle refresh token rotation (Nextcloud OIDC rotates on every use)
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
if user_id and new_refresh_token and new_refresh_token != refresh_token:
|
||||
# Store the new refresh token for future use
|
||||
# Calculate expiry as Unix timestamp (90 days from now)
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
)
|
||||
logger.info(f"Stored rotated refresh token for user {user_id}")
|
||||
|
||||
# Note: Nextcloud validates token audience on API calls - no need to pre-validate here
|
||||
# Validate audience
|
||||
await self._validate_token_audience(access_token, "nextcloud")
|
||||
|
||||
logger.info(
|
||||
f"Refreshed access token with scopes {scopes} (expires in {expires_in}s)"
|
||||
@@ -556,8 +453,11 @@ class TokenBrokerService:
|
||||
return False
|
||||
|
||||
try:
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
current_refresh_token = refresh_data["refresh_token"]
|
||||
# Decrypt current refresh token
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
current_refresh_token = self.fernet.decrypt(
|
||||
encrypted_token.encode()
|
||||
).decode()
|
||||
|
||||
# Get OIDC configuration
|
||||
config = await self._get_oidc_config()
|
||||
@@ -586,15 +486,13 @@ class TokenBrokerService:
|
||||
new_refresh_token = token_data.get("refresh_token")
|
||||
|
||||
if new_refresh_token and new_refresh_token != current_refresh_token:
|
||||
# storage.store_refresh_token() handles encryption internally
|
||||
# Convert datetime to Unix timestamp (int) for database storage
|
||||
expires_at = int(
|
||||
(datetime.now(timezone.utc) + timedelta(days=90)).timestamp()
|
||||
)
|
||||
# Encrypt and store new refresh token
|
||||
encrypted_new = self.fernet.encrypt(new_refresh_token.encode()).decode()
|
||||
await self.storage.store_refresh_token(
|
||||
user_id=user_id,
|
||||
refresh_token=new_refresh_token,
|
||||
expires_at=expires_at,
|
||||
refresh_token=encrypted_new,
|
||||
expires_at=datetime.now(timezone.utc)
|
||||
+ timedelta(days=90), # 90-day expiry
|
||||
)
|
||||
logger.info(f"Rotated master refresh token for user {user_id}")
|
||||
|
||||
@@ -638,8 +536,11 @@ class TokenBrokerService:
|
||||
refresh_data = await self.storage.get_refresh_token(user_id)
|
||||
if refresh_data:
|
||||
try:
|
||||
# storage.get_refresh_token() returns already-decrypted token
|
||||
refresh_token = refresh_data["refresh_token"]
|
||||
# Attempt to revoke at IdP
|
||||
encrypted_token = refresh_data["refresh_token"]
|
||||
refresh_token = self.fernet.decrypt(
|
||||
encrypted_token.encode()
|
||||
).decode()
|
||||
await self._revoke_token_at_idp(refresh_token)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to revoke at IdP: {e}")
|
||||
|
||||
@@ -117,71 +117,6 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
# Both modes do the same validation (MCP audience only)
|
||||
return await self._verify_mcp_audience(token)
|
||||
|
||||
async def verify_token_for_management_api(self, token: str) -> AccessToken | None:
|
||||
"""
|
||||
Verify token for management API access (ADR-018 NC PHP app integration).
|
||||
|
||||
This verification accepts ANY valid Nextcloud OIDC token, not just tokens
|
||||
with MCP server audience. This is needed because:
|
||||
- Astrolabe (NC PHP app) uses its own OAuth client with Nextcloud OIDC
|
||||
- Tokens from Astrolabe have Astrolabe's client_id as audience
|
||||
- MCP server's management API should accept these tokens
|
||||
|
||||
Security Model:
|
||||
~~~~~~~~~~~~~~~~
|
||||
This relaxed audience validation is secure because:
|
||||
|
||||
1. **Authentication layer** (this method):
|
||||
- Verifies token signature against Nextcloud's JWKS (cryptographic proof)
|
||||
- Verifies token is not expired
|
||||
- Extracts user identity from validated token claims
|
||||
|
||||
2. **Authorization layer** (management API endpoints):
|
||||
- EVERY endpoint verifies: token.sub == requested_resource_owner
|
||||
- Example: GET /users/{user_id}/session checks token_user_id == path_user_id
|
||||
- Users can ONLY access their own resources, never another user's
|
||||
|
||||
3. **Attack scenario analysis**:
|
||||
- Attacker with stolen token for App A cannot access user B's data
|
||||
- Token's `sub` claim is cryptographically bound to a specific user
|
||||
- Authorization layer rejects cross-user access attempts (403 Forbidden)
|
||||
|
||||
4. **Why audience validation isn't needed here**:
|
||||
- Audience validation prevents token confusion attacks across services
|
||||
- But management API authorization already gates access per-user
|
||||
- A token valid for "astrolabe" is still bound to user X, not user Y
|
||||
|
||||
Args:
|
||||
token: Bearer token to verify
|
||||
|
||||
Returns:
|
||||
AccessToken if valid (regardless of audience), None otherwise
|
||||
"""
|
||||
# Check cache first (using separate cache key to avoid mixing with MCP tokens)
|
||||
cache_key = f"mgmt:{hashlib.sha256(token.encode()).hexdigest()}"
|
||||
if cache_key in self._token_cache:
|
||||
userinfo, expiry = self._token_cache[cache_key]
|
||||
if time.time() < expiry:
|
||||
logger.debug("Management API token found in cache")
|
||||
oauth_token_cache_hits_total.labels(hit="true").inc()
|
||||
username = userinfo.get("sub") or userinfo.get("preferred_username")
|
||||
scope_string = userinfo.get("scope", "")
|
||||
scopes = scope_string.split() if scope_string else []
|
||||
return AccessToken(
|
||||
token=token,
|
||||
client_id=userinfo.get("client_id", ""),
|
||||
scopes=scopes,
|
||||
expires_at=int(expiry),
|
||||
resource=username,
|
||||
)
|
||||
else:
|
||||
del self._token_cache[cache_key]
|
||||
|
||||
oauth_token_cache_hits_total.labels(hit="false").inc()
|
||||
|
||||
# Verify token without audience check
|
||||
return await self._verify_without_audience_check(token, cache_key)
|
||||
|
||||
async def _verify_mcp_audience(self, token: str) -> AccessToken | None:
|
||||
"""
|
||||
Validate token has MCP audience.
|
||||
@@ -251,78 +186,6 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
record_oauth_token_validation(validation_method, "error")
|
||||
return None
|
||||
|
||||
async def _verify_without_audience_check(
|
||||
self, token: str, cache_key: str
|
||||
) -> AccessToken | None:
|
||||
"""
|
||||
Verify token validity without checking MCP audience or issuer.
|
||||
|
||||
Used for management API where tokens from Astrolabe (NC PHP app) need to
|
||||
be accepted. These tokens are issued by Nextcloud OIDC to Astrolabe's
|
||||
OAuth client, not MCP server's client.
|
||||
|
||||
What we verify:
|
||||
- ✓ Token signature (cryptographic proof token is from Nextcloud OIDC)
|
||||
- ✓ Token expiration (not expired)
|
||||
- ✓ Token structure (valid JWT format)
|
||||
|
||||
What we skip:
|
||||
- ✗ Audience check (token may have Astrolabe's audience, not MCP's)
|
||||
- ✗ Issuer check (token may have internal Nextcloud URL as issuer)
|
||||
|
||||
Security guarantee:
|
||||
- Authorization is enforced by management API endpoints
|
||||
- Each endpoint verifies: token.sub == requested_resource_owner
|
||||
- See verify_token_for_management_api() docstring for full security model
|
||||
|
||||
Args:
|
||||
token: Bearer token to verify
|
||||
cache_key: Cache key for storing validation result
|
||||
|
||||
Returns:
|
||||
AccessToken if valid, None otherwise
|
||||
"""
|
||||
validation_method = "unknown"
|
||||
try:
|
||||
# Attempt JWT verification first
|
||||
# Skip issuer check for management API tokens (may have internal URL)
|
||||
if self._is_jwt_format(token) and self.jwks_client:
|
||||
validation_method = "jwt"
|
||||
payload = await self._verify_jwt_signature(
|
||||
token, skip_issuer_check=True
|
||||
)
|
||||
if payload:
|
||||
record_oauth_token_validation("jwt", "valid")
|
||||
else:
|
||||
record_oauth_token_validation("jwt", "invalid")
|
||||
return None
|
||||
else:
|
||||
# Fall back to introspection for opaque tokens
|
||||
validation_method = "introspect"
|
||||
payload = await self._introspect_token(token)
|
||||
if payload:
|
||||
record_oauth_token_validation("introspect", "valid")
|
||||
else:
|
||||
record_oauth_token_validation("introspect", "invalid")
|
||||
return None
|
||||
|
||||
# Check payload is valid
|
||||
if not payload:
|
||||
return None
|
||||
|
||||
# Skip audience validation - any valid Nextcloud token is accepted
|
||||
logger.debug(
|
||||
f"Management API token validated (no audience check) for user: {payload.get('sub')}"
|
||||
)
|
||||
|
||||
# Cache and return the token
|
||||
return self._create_access_token_with_cache_key(token, payload, cache_key)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Management API token verification failed: {e}")
|
||||
record_oauth_token_validation(validation_method, "error")
|
||||
return None
|
||||
|
||||
def _has_mcp_audience(self, payload: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if token has MCP audience.
|
||||
@@ -367,15 +230,12 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
"""
|
||||
return "." in token and token.count(".") == 2
|
||||
|
||||
async def _verify_jwt_signature(
|
||||
self, token: str, skip_issuer_check: bool = False
|
||||
) -> dict[str, Any] | None:
|
||||
async def _verify_jwt_signature(self, token: str) -> dict[str, Any] | None:
|
||||
"""
|
||||
Verify JWT token with signature validation using JWKS.
|
||||
|
||||
Args:
|
||||
token: JWT token to verify
|
||||
skip_issuer_check: If True, skip issuer validation (for management API tokens)
|
||||
|
||||
Returns:
|
||||
Decoded payload if valid, None if invalid
|
||||
@@ -388,22 +248,25 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
|
||||
# Verify and decode JWT
|
||||
# Note: We don't validate audience here - that's done separately based on mode
|
||||
# Issuer validation can be skipped for management API tokens (from Astrolabe)
|
||||
should_verify_issuer = (
|
||||
not skip_issuer_check
|
||||
and hasattr(self.settings, "oidc_issuer")
|
||||
and self.settings.oidc_issuer
|
||||
)
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
signing_key.key,
|
||||
algorithms=["RS256"],
|
||||
issuer=(self.settings.oidc_issuer if should_verify_issuer else None),
|
||||
issuer=(
|
||||
self.settings.oidc_issuer
|
||||
if hasattr(self.settings, "oidc_issuer")
|
||||
else None
|
||||
),
|
||||
options={
|
||||
"verify_signature": True,
|
||||
"verify_exp": True,
|
||||
"verify_iat": True,
|
||||
"verify_iss": should_verify_issuer,
|
||||
"verify_iss": (
|
||||
True
|
||||
if hasattr(self.settings, "oidc_issuer")
|
||||
and self.settings.oidc_issuer
|
||||
else False
|
||||
),
|
||||
"verify_aud": False, # We handle audience validation separately
|
||||
},
|
||||
)
|
||||
@@ -440,13 +303,10 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
|
||||
try:
|
||||
# Introspection requires client authentication
|
||||
client_id = self.settings.oidc_client_id
|
||||
client_secret = self.settings.oidc_client_secret
|
||||
assert client_id is not None and client_secret is not None
|
||||
response = await self.http_client.post(
|
||||
self.introspection_uri,
|
||||
data={"token": token},
|
||||
auth=(client_id, client_secret),
|
||||
auth=(self.settings.oidc_client_id, self.settings.oidc_client_secret),
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@@ -495,24 +355,6 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
token: The bearer token
|
||||
payload: Validated token payload
|
||||
|
||||
Returns:
|
||||
AccessToken object or None if required fields missing
|
||||
"""
|
||||
# Use default cache key (hash of token)
|
||||
cache_key = hashlib.sha256(token.encode()).hexdigest()
|
||||
return self._create_access_token_with_cache_key(token, payload, cache_key)
|
||||
|
||||
def _create_access_token_with_cache_key(
|
||||
self, token: str, payload: dict[str, Any], cache_key: str
|
||||
) -> AccessToken | None:
|
||||
"""
|
||||
Create AccessToken object from validated token payload with custom cache key.
|
||||
|
||||
Args:
|
||||
token: The bearer token
|
||||
payload: Validated token payload
|
||||
cache_key: Key to use for caching (allows separate caches for MCP vs management API)
|
||||
|
||||
Returns:
|
||||
AccessToken object or None if required fields missing
|
||||
"""
|
||||
@@ -537,13 +379,14 @@ class UnifiedTokenVerifier(TokenVerifier):
|
||||
logger.warning("No 'exp' claim in token, using default TTL")
|
||||
exp = int(time.time() + self.cache_ttl)
|
||||
|
||||
# Cache the result with the provided key
|
||||
# Cache the result
|
||||
token_hash = hashlib.sha256(token.encode()).hexdigest()
|
||||
userinfo = {
|
||||
"sub": username,
|
||||
"scope": scope_string,
|
||||
**{k: v for k, v in payload.items() if k not in ["sub", "scope"]},
|
||||
}
|
||||
self._token_cache[cache_key] = (userinfo, exp)
|
||||
self._token_cache[token_hash] = (userinfo, exp)
|
||||
|
||||
return AccessToken(
|
||||
token=token,
|
||||
|
||||
@@ -18,8 +18,6 @@ from starlette.authentication import requires
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Setup Jinja2 environment for templates
|
||||
@@ -27,20 +25,14 @@ _template_dir = Path(__file__).parent / "templates"
|
||||
_jinja_env = Environment(loader=FileSystemLoader(_template_dir))
|
||||
|
||||
|
||||
async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudClient:
|
||||
"""Get an authenticated Nextcloud client for user info page operations.
|
||||
|
||||
This is a shared helper for authenticated routes that need to access
|
||||
Nextcloud APIs. It handles both BasicAuth and OAuth authentication modes.
|
||||
async def _get_authenticated_client_for_userinfo(request: Request) -> httpx.AsyncClient:
|
||||
"""Get an authenticated HTTP client for user info page operations.
|
||||
|
||||
Args:
|
||||
request: Starlette request object
|
||||
|
||||
Returns:
|
||||
Authenticated NextcloudClient
|
||||
|
||||
Raises:
|
||||
RuntimeError: If credentials/session not configured
|
||||
Authenticated httpx.AsyncClient
|
||||
"""
|
||||
oauth_ctx = getattr(request.app.state, "oauth_context", None)
|
||||
|
||||
@@ -53,15 +45,11 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudC
|
||||
if not all([nextcloud_host, username, password]):
|
||||
raise RuntimeError("BasicAuth credentials not configured")
|
||||
|
||||
from httpx import BasicAuth
|
||||
|
||||
assert nextcloud_host is not None
|
||||
assert username is not None
|
||||
assert password is not None
|
||||
return NextcloudClient(
|
||||
assert nextcloud_host is not None # Type narrowing for type checker
|
||||
return httpx.AsyncClient(
|
||||
base_url=nextcloud_host,
|
||||
username=username,
|
||||
auth=BasicAuth(username, password),
|
||||
auth=(username, password),
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
# OAuth mode - get token from session
|
||||
@@ -76,14 +64,15 @@ async def _get_authenticated_client_for_userinfo(request: Request) -> NextcloudC
|
||||
raise RuntimeError("No access token found in session")
|
||||
|
||||
access_token = token_data["access_token"]
|
||||
username = token_data.get("username")
|
||||
nextcloud_host = oauth_ctx.get("config", {}).get("nextcloud_host", "")
|
||||
|
||||
if not nextcloud_host or not username:
|
||||
raise RuntimeError("Nextcloud host or username not configured")
|
||||
if not nextcloud_host:
|
||||
raise RuntimeError("Nextcloud host not configured")
|
||||
|
||||
return NextcloudClient.from_token(
|
||||
base_url=nextcloud_host, token=access_token, username=username
|
||||
return httpx.AsyncClient(
|
||||
base_url=nextcloud_host,
|
||||
headers={"Authorization": f"Bearer {access_token}"},
|
||||
timeout=30.0,
|
||||
)
|
||||
|
||||
|
||||
@@ -434,10 +423,10 @@ async def user_info_html(request: Request) -> HTMLResponse:
|
||||
try:
|
||||
from nextcloud_mcp_server.auth.permissions import is_nextcloud_admin
|
||||
|
||||
# Get authenticated Nextcloud client
|
||||
nc_client = await _get_authenticated_client_for_userinfo(request)
|
||||
is_admin = await is_nextcloud_admin(request, nc_client._client)
|
||||
await nc_client.close()
|
||||
# Get authenticated HTTP client
|
||||
http_client = await _get_authenticated_client_for_userinfo(request)
|
||||
is_admin = await is_nextcloud_admin(request, http_client)
|
||||
await http_client.aclose()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to check admin status: {e}")
|
||||
# Default to not admin if check fails
|
||||
|
||||
@@ -22,13 +22,11 @@ from starlette.requests import Request
|
||||
from starlette.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.search import (
|
||||
BM25HybridSearchAlgorithm,
|
||||
SemanticSearchAlgorithm,
|
||||
)
|
||||
from nextcloud_mcp_server.vector.pca import PCA
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -140,10 +138,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
_get_authenticated_client_for_userinfo,
|
||||
)
|
||||
|
||||
with trace_operation("vector_viz.get_auth_client"):
|
||||
auth_client_ctx = await _get_authenticated_client_for_userinfo(request)
|
||||
|
||||
async with auth_client_ctx as nc_client: # noqa: F841
|
||||
async with await _get_authenticated_client_for_userinfo(request) as http_client: # noqa: F841
|
||||
# Create search algorithm (no client needed - verification removed)
|
||||
if algorithm == "semantic":
|
||||
search_algo = SemanticSearchAlgorithm(score_threshold=score_threshold)
|
||||
@@ -163,40 +158,24 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
all_results = []
|
||||
if doc_types is None or len(doc_types) == 0:
|
||||
# Cross-app search - search all indexed types
|
||||
with trace_operation(
|
||||
"vector_viz.search_execute",
|
||||
attributes={
|
||||
"search.algorithm": algorithm,
|
||||
"search.limit": limit * 2,
|
||||
"search.doc_type": "all",
|
||||
},
|
||||
):
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=None, # Search all types
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=None, # Search all types
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
all_results.extend(unverified_results)
|
||||
else:
|
||||
# Search each document type and combine
|
||||
for doc_type in doc_types:
|
||||
with trace_operation(
|
||||
"vector_viz.search_execute",
|
||||
attributes={
|
||||
"search.algorithm": algorithm,
|
||||
"search.limit": limit * 2,
|
||||
"search.doc_type": doc_type,
|
||||
},
|
||||
):
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=doc_type,
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
unverified_results = await search_algo.search(
|
||||
query=query,
|
||||
user_id=username,
|
||||
limit=limit * 2, # Buffer for verification filtering
|
||||
doc_type=doc_type,
|
||||
score_threshold=score_threshold,
|
||||
)
|
||||
all_results.extend(unverified_results)
|
||||
# Sort by score before verification
|
||||
all_results.sort(key=lambda r: r.score, reverse=True)
|
||||
@@ -210,26 +189,22 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
# Store original scores and normalize for visualization
|
||||
# (best result = 1.0, worst result = 0.0 within THIS result set)
|
||||
# This makes visual encoding meaningful regardless of RRF normalization
|
||||
with trace_operation(
|
||||
"vector_viz.score_normalize",
|
||||
attributes={"normalize.num_results": len(search_results)},
|
||||
):
|
||||
if search_results:
|
||||
scores = [r.score for r in search_results]
|
||||
min_score, max_score = min(scores), max(scores)
|
||||
score_range = max_score - min_score if max_score > min_score else 1.0
|
||||
if search_results:
|
||||
scores = [r.score for r in search_results]
|
||||
min_score, max_score = min(scores), max(scores)
|
||||
score_range = max_score - min_score if max_score > min_score else 1.0
|
||||
|
||||
logger.info(
|
||||
f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
|
||||
f"→ [0.0, 1.0]"
|
||||
)
|
||||
logger.info(
|
||||
f"Normalizing scores for viz: original range [{min_score:.3f}, {max_score:.3f}] "
|
||||
f"→ [0.0, 1.0]"
|
||||
)
|
||||
|
||||
# Store original score and rescale to 0-1 for visualization
|
||||
for r in search_results:
|
||||
# Store original score before normalization
|
||||
r.original_score = r.score
|
||||
# Rescale for visual encoding
|
||||
r.score = (r.score - min_score) / score_range
|
||||
# Store original score and rescale to 0-1 for visualization
|
||||
for r in search_results:
|
||||
# Store original score before normalization
|
||||
r.original_score = r.score
|
||||
# Rescale for visual encoding
|
||||
r.score = (r.score - min_score) / score_range
|
||||
|
||||
if not search_results:
|
||||
return JSONResponse(
|
||||
@@ -237,57 +212,75 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"success": True,
|
||||
"results": [],
|
||||
"coordinates_3d": [],
|
||||
"query_coords": [],
|
||||
"query_coords": None,
|
||||
"message": "No results found",
|
||||
}
|
||||
)
|
||||
|
||||
# Fetch vectors for specific matching chunks from Qdrant using batch retrieve
|
||||
# Fetch vectors for matching results from Qdrant
|
||||
vector_fetch_start = time.perf_counter()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
doc_ids = [r.id for r in search_results]
|
||||
|
||||
with trace_operation("vector_viz.get_qdrant_client"):
|
||||
qdrant_client = await get_qdrant_client()
|
||||
# Retrieve vectors for the matching documents
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchAny
|
||||
|
||||
chunk_vectors_map = {} # Map (doc_id, chunk_start, chunk_end) -> vector
|
||||
points_response = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(
|
||||
key="doc_id",
|
||||
match=MatchAny(any=[str(doc_id) for doc_id in doc_ids]),
|
||||
),
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match={"value": username},
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=len(doc_ids) * 2, # Account for multiple chunks per doc
|
||||
with_vectors=["dense"], # Only fetch dense vectors for visualization
|
||||
with_payload=["doc_id"], # Need doc_id to map vectors to results
|
||||
)
|
||||
|
||||
# Collect point IDs from search results for batch retrieval
|
||||
# point_id is the Qdrant internal ID returned by search algorithms
|
||||
point_ids = [r.point_id for r in search_results if r.point_id]
|
||||
points = points_response[0]
|
||||
|
||||
if point_ids:
|
||||
# Single batch retrieve call instead of N sequential scroll calls
|
||||
# This is ~50x faster for 50 results (1 HTTP request vs 50)
|
||||
with trace_operation(
|
||||
"vector_viz.vector_retrieve",
|
||||
attributes={"retrieve.num_points": len(point_ids)},
|
||||
):
|
||||
points_response = await qdrant_client.retrieve(
|
||||
collection_name=settings.get_collection_name(),
|
||||
ids=point_ids,
|
||||
with_vectors=["dense"],
|
||||
with_payload=["doc_id", "chunk_start_offset", "chunk_end_offset"],
|
||||
)
|
||||
if not points:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"results": [],
|
||||
"coordinates_2d": [],
|
||||
"message": "No vectors found for results",
|
||||
}
|
||||
)
|
||||
|
||||
# Build chunk_vectors_map from batch response
|
||||
for point in points_response:
|
||||
if point.vector is not None:
|
||||
# Extract dense vector (handle both named and unnamed vectors)
|
||||
if isinstance(point.vector, dict):
|
||||
vector = point.vector.get("dense")
|
||||
else:
|
||||
vector = point.vector
|
||||
# Extract dense vectors and group by document
|
||||
def extract_dense_vector(point):
|
||||
if point.vector is None:
|
||||
return None
|
||||
# If named vectors (dict), extract "dense"
|
||||
if isinstance(point.vector, dict):
|
||||
return point.vector.get("dense")
|
||||
# If unnamed vector (array), use directly
|
||||
return point.vector
|
||||
|
||||
if vector is not None and point.payload:
|
||||
doc_id = point.payload.get("doc_id")
|
||||
chunk_start = point.payload.get("chunk_start_offset")
|
||||
chunk_end = point.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, chunk_start, chunk_end)
|
||||
chunk_vectors_map[chunk_key] = vector
|
||||
# Group chunk vectors by doc_id
|
||||
from collections import defaultdict
|
||||
|
||||
doc_chunks = defaultdict(list)
|
||||
for point in points:
|
||||
if point.payload:
|
||||
doc_id = int(point.payload.get("doc_id", 0))
|
||||
vector = extract_dense_vector(point)
|
||||
if vector is not None:
|
||||
doc_chunks[doc_id].append(vector)
|
||||
|
||||
vector_fetch_duration = time.perf_counter() - vector_fetch_start
|
||||
|
||||
if len(chunk_vectors_map) < 2:
|
||||
# Not enough chunks for PCA
|
||||
if len(doc_chunks) < 2:
|
||||
# Not enough documents for PCA
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
@@ -298,21 +291,20 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"title": r.title,
|
||||
"excerpt": r.excerpt,
|
||||
"score": r.score,
|
||||
"metadata": r.metadata,
|
||||
}
|
||||
for r in search_results
|
||||
],
|
||||
"coordinates_3d": [[0, 0, 0]] * len(search_results),
|
||||
"query_coords": [0, 0, 0],
|
||||
"message": "Not enough chunks for PCA",
|
||||
"message": "Not enough documents for PCA",
|
||||
}
|
||||
)
|
||||
|
||||
# Detect embedding dimension from first available vector
|
||||
embedding_dim = None
|
||||
for vector in chunk_vectors_map.values():
|
||||
if vector is not None:
|
||||
embedding_dim = len(vector)
|
||||
for chunks in doc_chunks.values():
|
||||
if chunks:
|
||||
embedding_dim = len(chunks[0])
|
||||
break
|
||||
|
||||
if embedding_dim is None:
|
||||
@@ -326,42 +318,37 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
|
||||
logger.info(f"Detected embedding dimension: {embedding_dim}")
|
||||
|
||||
# Build chunk vectors array in search_results order (1:1 mapping)
|
||||
chunk_vectors = []
|
||||
# Average chunk vectors per document to create document-level embeddings
|
||||
# Maintain order of search_results for coordinate mapping
|
||||
doc_vectors = []
|
||||
for result in search_results:
|
||||
chunk_key = (result.id, result.chunk_start_offset, result.chunk_end_offset)
|
||||
if chunk_key in chunk_vectors_map:
|
||||
chunk_vectors.append(chunk_vectors_map[chunk_key])
|
||||
if result.id in doc_chunks:
|
||||
# Average all chunk embeddings for this document
|
||||
chunk_vectors = np.array(doc_chunks[result.id])
|
||||
avg_vector = np.mean(chunk_vectors, axis=0)
|
||||
doc_vectors.append(avg_vector)
|
||||
logger.debug(f"Doc {result.id}: averaged {len(chunk_vectors)} chunks")
|
||||
else:
|
||||
# Chunk not found in vectors (shouldn't happen)
|
||||
logger.warning(
|
||||
f"Chunk {chunk_key} not found in fetched vectors, using zero vector"
|
||||
)
|
||||
# Use zero vector as fallback
|
||||
chunk_vectors.append(np.zeros(embedding_dim))
|
||||
# Document not found in vectors (shouldn't happen)
|
||||
logger.warning(f"Doc {result.id} not found in fetched vectors")
|
||||
# Use zero vector as fallback with detected dimension
|
||||
doc_vectors.append(np.zeros(embedding_dim))
|
||||
|
||||
chunk_vectors = np.array(chunk_vectors)
|
||||
doc_vectors = np.array(doc_vectors)
|
||||
|
||||
# Reuse query embedding from search algorithm (avoids redundant embedding call)
|
||||
# Generate query embedding for visualization
|
||||
query_embed_start = time.perf_counter()
|
||||
if search_algo.query_embedding is not None:
|
||||
query_embedding = search_algo.query_embedding
|
||||
logger.info(
|
||||
f"Reusing query embedding from search algorithm "
|
||||
f"(dimension={len(query_embedding)})"
|
||||
)
|
||||
else:
|
||||
# Fallback: generate embedding if not available from search
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
from nextcloud_mcp_server.embedding.service import get_embedding_service
|
||||
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
query_embed_duration = time.perf_counter() - query_embed_start
|
||||
|
||||
# Combine query vector with chunk vectors for PCA
|
||||
logger.info(f"Generated query embedding (dimension={len(query_embedding)})")
|
||||
|
||||
# Combine query vector with document vectors for PCA
|
||||
# Query will be the last point in the array
|
||||
all_vectors = np.vstack([chunk_vectors, np.array([query_embedding])])
|
||||
all_vectors = np.vstack([doc_vectors, np.array([query_embedding])])
|
||||
|
||||
# Normalize vectors to unit length (L2 normalization)
|
||||
# This is critical because Qdrant uses COSINE distance, which only measures
|
||||
@@ -388,26 +375,9 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
)
|
||||
|
||||
# Apply PCA dimensionality reduction (768-dim → 3D) on normalized vectors
|
||||
# Run in thread pool to avoid blocking the event loop (CPU-bound)
|
||||
pca_start = time.perf_counter()
|
||||
|
||||
def _compute_pca(vectors: np.ndarray) -> tuple[np.ndarray, PCA]:
|
||||
pca = PCA(n_components=3)
|
||||
coords = pca.fit_transform(vectors)
|
||||
return coords, pca
|
||||
|
||||
import anyio
|
||||
|
||||
with trace_operation(
|
||||
"vector_viz.pca_compute",
|
||||
attributes={
|
||||
"pca.num_vectors": len(all_vectors_normalized),
|
||||
"pca.embedding_dim": embedding_dim,
|
||||
},
|
||||
):
|
||||
coords_3d, pca = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: _compute_pca(all_vectors_normalized)
|
||||
)
|
||||
pca = PCA(n_components=3)
|
||||
coords_3d = pca.fit_transform(all_vectors_normalized)
|
||||
pca_duration = time.perf_counter() - pca_start
|
||||
|
||||
# After fit, these attributes are guaranteed to be set
|
||||
@@ -424,12 +394,17 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
# Replace NaN with 0 to allow JSON serialization
|
||||
coords_3d = np.nan_to_num(coords_3d, nan=0.0)
|
||||
|
||||
# Split query coords from chunk coords
|
||||
# Split query coords from document coords
|
||||
# Round to 2 decimal places for cleaner display
|
||||
query_coords_3d = [
|
||||
round(float(x), 2) for x in coords_3d[-1]
|
||||
] # Last point is query
|
||||
chunk_coords_3d = coords_3d[:-1] # All but last are chunks
|
||||
doc_coords_3d = coords_3d[:-1] # All but last are documents
|
||||
|
||||
total_chunks = sum(len(chunks) for chunks in doc_chunks.values())
|
||||
avg_chunks_per_doc = (
|
||||
total_chunks / len(doc_vectors) if doc_vectors.size > 0 else 0
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"PCA explained variance: PC1={pca.explained_variance_ratio_[0]:.3f}, "
|
||||
@@ -437,14 +412,13 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
f"PC3={pca.explained_variance_ratio_[2]:.3f}"
|
||||
)
|
||||
logger.info(
|
||||
f"Embedding stats: chunks={len(chunk_vectors)}, "
|
||||
f"query_dim={len(query_embedding)}, chunk_vector_dim={chunk_vectors.shape[1] if chunk_vectors.size > 0 else 0}"
|
||||
f"Embedding stats: documents={len(doc_vectors)}, "
|
||||
f"total_chunks={total_chunks}, avg_chunks_per_doc={avg_chunks_per_doc:.1f}, "
|
||||
f"query_dim={len(query_embedding)}, doc_vector_dim={doc_vectors.shape[1] if doc_vectors.size > 0 else 0}"
|
||||
)
|
||||
|
||||
# Coordinates already match search_results order (1:1 mapping)
|
||||
result_coords = [
|
||||
[round(float(x), 2) for x in coord] for coord in chunk_coords_3d
|
||||
]
|
||||
result_coords = [[round(float(x), 2) for x in coord] for coord in doc_coords_3d]
|
||||
|
||||
# Build response
|
||||
response_results = [
|
||||
@@ -459,7 +433,6 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
), # Raw score from algorithm
|
||||
"chunk_start_offset": r.chunk_start_offset,
|
||||
"chunk_end_offset": r.chunk_end_offset,
|
||||
"metadata": r.metadata, # Include metadata (e.g., board_id for deck_card)
|
||||
}
|
||||
for r in search_results
|
||||
]
|
||||
@@ -474,7 +447,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
f"vector_fetch={vector_fetch_duration * 1000:.1f}ms ({vector_fetch_duration / total_duration * 100:.1f}%), "
|
||||
f"query_embed={query_embed_duration * 1000:.1f}ms ({query_embed_duration / total_duration * 100:.1f}%), "
|
||||
f"pca={pca_duration * 1000:.1f}ms ({pca_duration / total_duration * 100:.1f}%), "
|
||||
f"results={len(search_results)}, chunk_vectors={len(chunk_vectors)}"
|
||||
f"results={len(search_results)}, doc_vectors={len(doc_vectors)}"
|
||||
)
|
||||
|
||||
return JSONResponse(
|
||||
@@ -495,7 +468,7 @@ async def vector_visualization_search(request: Request) -> JSONResponse:
|
||||
"query_embed_ms": round(query_embed_duration * 1000, 2),
|
||||
"pca_ms": round(pca_duration * 1000, 2),
|
||||
"num_results": len(search_results),
|
||||
"num_chunk_vectors": len(chunk_vectors),
|
||||
"num_doc_vectors": len(doc_vectors),
|
||||
},
|
||||
}
|
||||
)
|
||||
@@ -544,118 +517,77 @@ async def chunk_context_endpoint(request: Request) -> JSONResponse:
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Type assertions - we validated these above
|
||||
assert doc_type is not None
|
||||
assert doc_id is not None
|
||||
assert start_str is not None
|
||||
assert end_str is not None
|
||||
|
||||
start = int(start_str)
|
||||
end = int(end_str)
|
||||
# Convert doc_id to int (all document types use int IDs)
|
||||
doc_id_int = int(doc_id)
|
||||
|
||||
# Get authenticated Nextcloud client
|
||||
# Currently only support notes
|
||||
if doc_type != "note":
|
||||
return JSONResponse(
|
||||
{"success": False, "error": f"Unsupported doc_type: {doc_type}"},
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Get authenticated HTTP client and fetch note
|
||||
from nextcloud_mcp_server.auth.userinfo_routes import (
|
||||
_get_authenticated_client_for_userinfo,
|
||||
)
|
||||
from nextcloud_mcp_server.search.context import get_chunk_with_context
|
||||
from nextcloud_mcp_server.client.notes import NotesClient
|
||||
|
||||
# Use context expansion module to fetch chunk with surrounding context
|
||||
async with await _get_authenticated_client_for_userinfo(request) as nc_client:
|
||||
chunk_context = await get_chunk_with_context(
|
||||
nc_client=nc_client,
|
||||
user_id=request.user.display_name, # User ID from auth
|
||||
doc_id=doc_id_int,
|
||||
doc_type=doc_type,
|
||||
chunk_start=start,
|
||||
chunk_end=end,
|
||||
context_chars=context_chars,
|
||||
)
|
||||
# Get username from request auth
|
||||
username = (
|
||||
request.user.display_name
|
||||
if hasattr(request.user, "display_name")
|
||||
else "unknown"
|
||||
)
|
||||
|
||||
# Check if context expansion succeeded
|
||||
if chunk_context is None:
|
||||
# Create notes client with authenticated HTTP client
|
||||
http_client = await _get_authenticated_client_for_userinfo(request)
|
||||
notes_client = NotesClient(http_client, username)
|
||||
|
||||
# Fetch full note content
|
||||
note = await notes_client.get_note(int(doc_id))
|
||||
full_content = f"{note['title']}\n\n{note['content']}"
|
||||
|
||||
# Validate offsets
|
||||
if start < 0 or end > len(full_content) or start >= end:
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": False,
|
||||
"error": f"Failed to fetch chunk context for {doc_type} {doc_id}",
|
||||
"error": f"Invalid offsets: start={start}, end={end}, content_length={len(full_content)}",
|
||||
},
|
||||
status_code=404,
|
||||
status_code=400,
|
||||
)
|
||||
|
||||
# Extract chunk
|
||||
chunk_text = full_content[start:end]
|
||||
|
||||
# Extract context before and after
|
||||
before_start = max(0, start - context_chars)
|
||||
before_context = full_content[before_start:start]
|
||||
|
||||
after_end = min(len(full_content), end + context_chars)
|
||||
after_context = full_content[end:after_end]
|
||||
|
||||
# Determine if there's more content
|
||||
has_more_before = before_start > 0
|
||||
has_more_after = after_end < len(full_content)
|
||||
|
||||
logger.info(
|
||||
f"Fetched chunk context for {doc_type}_{doc_id}: "
|
||||
f"chunk_len={len(chunk_context.chunk_text)}, "
|
||||
f"before_len={len(chunk_context.before_context)}, "
|
||||
f"after_len={len(chunk_context.after_context)}"
|
||||
f"chunk_len={len(chunk_text)}, before_len={len(before_context)}, "
|
||||
f"after_len={len(after_context)}"
|
||||
)
|
||||
|
||||
# For PDF files, also fetch the highlighted page image from Qdrant
|
||||
highlighted_page_image = None
|
||||
page_number = None
|
||||
if doc_type == "file":
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
settings = get_settings()
|
||||
qdrant_client = await get_qdrant_client()
|
||||
username = request.user.display_name
|
||||
|
||||
# Query for this specific chunk's highlighted image
|
||||
points_response = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
get_placeholder_filter(),
|
||||
FieldCondition(
|
||||
key="doc_id", match=MatchValue(value=doc_id_int)
|
||||
),
|
||||
FieldCondition(
|
||||
key="user_id", match=MatchValue(value=username)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_vectors=False,
|
||||
with_payload=["highlighted_page_image", "page_number"],
|
||||
)
|
||||
|
||||
points = points_response[0]
|
||||
if points and points[0].payload:
|
||||
highlighted_page_image = points[0].payload.get(
|
||||
"highlighted_page_image"
|
||||
)
|
||||
page_number = points[0].payload.get("page_number")
|
||||
if highlighted_page_image:
|
||||
logger.info(
|
||||
f"Found highlighted image for chunk: "
|
||||
f"page={page_number}, image_size={len(highlighted_page_image)}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to fetch highlighted image: {e}")
|
||||
|
||||
# Return response compatible with frontend expectations
|
||||
response_data: dict = {
|
||||
"success": True,
|
||||
"chunk_text": chunk_context.chunk_text,
|
||||
"before_context": chunk_context.before_context,
|
||||
"after_context": chunk_context.after_context,
|
||||
"has_more_before": chunk_context.has_before_truncation,
|
||||
"has_more_after": chunk_context.has_after_truncation,
|
||||
}
|
||||
|
||||
# Add image data if available
|
||||
if highlighted_page_image:
|
||||
response_data["highlighted_page_image"] = highlighted_page_image
|
||||
response_data["page_number"] = page_number
|
||||
|
||||
return JSONResponse(response_data)
|
||||
return JSONResponse(
|
||||
{
|
||||
"success": True,
|
||||
"chunk_text": chunk_text,
|
||||
"before_context": before_context,
|
||||
"after_context": after_context,
|
||||
"has_more_before": has_more_before,
|
||||
"has_more_after": has_more_after,
|
||||
}
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
logger.error(f"Invalid parameter format: {e}")
|
||||
|
||||
@@ -139,7 +139,6 @@ async def _get_authenticated_client(request: Request) -> httpx.AsyncClient:
|
||||
raise RuntimeError("BasicAuth credentials not configured")
|
||||
|
||||
assert nextcloud_host is not None # Type narrowing for type checker
|
||||
assert username is not None and password is not None # Type narrowing
|
||||
return httpx.AsyncClient(
|
||||
base_url=nextcloud_host,
|
||||
auth=(username, password),
|
||||
|
||||
+3
-193
@@ -29,9 +29,9 @@ from .app import get_app
|
||||
@click.option(
|
||||
"--transport",
|
||||
"-t",
|
||||
default="streamable-http",
|
||||
default="sse",
|
||||
show_default=True,
|
||||
type=click.Choice(["streamable-http", "http"]),
|
||||
type=click.Choice(["sse", "streamable-http", "http"]),
|
||||
help="MCP transport protocol",
|
||||
)
|
||||
@click.option(
|
||||
@@ -253,195 +253,5 @@ def run(
|
||||
)
|
||||
|
||||
|
||||
@click.group()
|
||||
def db():
|
||||
"""Database migration management commands."""
|
||||
pass
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database (can also use TOKEN_STORAGE_DB env var)",
|
||||
)
|
||||
@click.option(
|
||||
"--revision",
|
||||
"-r",
|
||||
default="head",
|
||||
show_default=True,
|
||||
help="Target revision (default: head for latest)",
|
||||
)
|
||||
def upgrade(database_path: str, revision: str):
|
||||
"""Upgrade database to a specific revision.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
# Upgrade to latest version
|
||||
$ nextcloud-mcp-server db upgrade
|
||||
|
||||
# Upgrade to specific revision
|
||||
$ nextcloud-mcp-server db upgrade --revision 001
|
||||
|
||||
# Use custom database path
|
||||
$ nextcloud-mcp-server db upgrade -d /path/to/tokens.db
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import upgrade_database
|
||||
|
||||
try:
|
||||
click.echo(f"Upgrading database to revision: {revision}")
|
||||
upgrade_database(database_path, revision)
|
||||
click.echo(click.style("✓ Database upgraded successfully", fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Upgrade failed: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
@click.option(
|
||||
"--revision",
|
||||
"-r",
|
||||
default="-1",
|
||||
show_default=True,
|
||||
help="Target revision (default: -1 for previous version)",
|
||||
)
|
||||
@click.confirmation_option(
|
||||
prompt="Are you sure you want to downgrade the database? This may result in data loss."
|
||||
)
|
||||
def downgrade(database_path: str, revision: str):
|
||||
"""Downgrade database to a specific revision.
|
||||
|
||||
WARNING: This may result in data loss! Use with caution.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
# Downgrade by one version
|
||||
$ nextcloud-mcp-server db downgrade
|
||||
|
||||
# Downgrade to specific revision
|
||||
$ nextcloud-mcp-server db downgrade --revision 001
|
||||
|
||||
# Downgrade to base (empty database)
|
||||
$ nextcloud-mcp-server db downgrade --revision base
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import downgrade_database
|
||||
|
||||
try:
|
||||
click.echo(f"Downgrading database to revision: {revision}")
|
||||
downgrade_database(database_path, revision)
|
||||
click.echo(click.style("✓ Database downgraded successfully", fg="green"))
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Downgrade failed: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
def current(database_path: str):
|
||||
"""Show current database revision.
|
||||
|
||||
\b
|
||||
Example:
|
||||
$ nextcloud-mcp-server db current
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import get_current_revision
|
||||
|
||||
try:
|
||||
revision = get_current_revision(database_path)
|
||||
if revision:
|
||||
click.echo(f"Current revision: {click.style(revision, fg='cyan')}")
|
||||
else:
|
||||
click.echo(
|
||||
click.style(
|
||||
"Database is not versioned (no alembic_version table)", fg="yellow"
|
||||
)
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style(f"✗ Failed to get current revision: {e}", fg="red"), err=True
|
||||
)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.option(
|
||||
"--database-path",
|
||||
"-d",
|
||||
envvar="TOKEN_STORAGE_DB",
|
||||
default="/app/data/tokens.db",
|
||||
show_default=True,
|
||||
help="Path to token storage database",
|
||||
)
|
||||
def history(database_path: str):
|
||||
"""Show migration history.
|
||||
|
||||
\b
|
||||
Example:
|
||||
$ nextcloud-mcp-server db history
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import show_migration_history
|
||||
|
||||
try:
|
||||
click.echo("Migration history:")
|
||||
show_migration_history(database_path)
|
||||
except Exception as e:
|
||||
click.echo(click.style(f"✗ Failed to show history: {e}", fg="red"), err=True)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
@db.command()
|
||||
@click.argument("message")
|
||||
def migrate(message: str):
|
||||
"""Create a new migration script (developers only).
|
||||
|
||||
The MESSAGE argument describes the changes in this migration.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
$ nextcloud-mcp-server db migrate "add user preferences table"
|
||||
$ nextcloud-mcp-server db migrate "add index on refresh_tokens.user_id"
|
||||
|
||||
Note: You must manually edit the generated migration file to add SQL statements.
|
||||
"""
|
||||
from nextcloud_mcp_server.migrations import create_migration
|
||||
|
||||
try:
|
||||
click.echo(f"Creating new migration: {message}")
|
||||
create_migration(message)
|
||||
click.echo(click.style("✓ Migration created successfully", fg="green"))
|
||||
click.echo(
|
||||
"Edit the migration file in alembic/versions/ to add upgrade/downgrade SQL."
|
||||
)
|
||||
except Exception as e:
|
||||
click.echo(
|
||||
click.style(f"✗ Failed to create migration: {e}", fg="red"), err=True
|
||||
)
|
||||
raise click.ClickException(str(e))
|
||||
|
||||
|
||||
# Create CLI group with subcommands
|
||||
cli = click.Group()
|
||||
cli.add_command(run)
|
||||
cli.add_command(db)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
run()
|
||||
|
||||
@@ -18,7 +18,6 @@ from .contacts import ContactsClient
|
||||
from .cookbook import CookbookClient
|
||||
from .deck import DeckClient
|
||||
from .groups import GroupsClient
|
||||
from .news import NewsClient
|
||||
from .notes import NotesClient
|
||||
from .sharing import SharingClient
|
||||
from .tables import TablesClient
|
||||
@@ -82,7 +81,6 @@ class NextcloudClient:
|
||||
self.contacts = ContactsClient(self._client, username)
|
||||
self.cookbook = CookbookClient(self._client, username)
|
||||
self.deck = DeckClient(self._client, username)
|
||||
self.news = NewsClient(self._client, username)
|
||||
self.users = UsersClient(self._client, username)
|
||||
self.groups = GroupsClient(self._client, username)
|
||||
self.sharing = SharingClient(self._client, username)
|
||||
@@ -132,75 +130,10 @@ class NextcloudClient:
|
||||
all_notes = self.notes.get_all_notes()
|
||||
return await self._notes_search.search_notes(all_notes, query)
|
||||
|
||||
async def find_files_by_tag(
|
||||
self, tag_name: str, mime_type_filter: str | None = None
|
||||
) -> list[dict]:
|
||||
"""Find files by system tag name, optionally filtered by MIME type.
|
||||
|
||||
This method coordinates tag lookup and file retrieval via WebDAV:
|
||||
1. Look up the tag ID by name
|
||||
2. Get all files with that tag (via REPORT with full metadata)
|
||||
3. Optionally filter by MIME type
|
||||
|
||||
Args:
|
||||
tag_name: Name of the system tag to search for (e.g., "vector-index")
|
||||
mime_type_filter: Optional MIME type filter (e.g., "application/pdf")
|
||||
|
||||
Returns:
|
||||
List of file dictionaries with WebDAV properties (path, size, content_type, etc.)
|
||||
|
||||
Raises:
|
||||
RuntimeError: If tag lookup or file query fails
|
||||
|
||||
Examples:
|
||||
# Find all files with "vector-index" tag
|
||||
files = await nc_client.find_files_by_tag("vector-index")
|
||||
|
||||
# Find only PDFs with the tag
|
||||
pdfs = await nc_client.find_files_by_tag("vector-index", "application/pdf")
|
||||
"""
|
||||
# Look up tag by name using WebDAV
|
||||
tag = await self.webdav.get_tag_by_name(tag_name)
|
||||
if not tag:
|
||||
logger.debug(f"Tag '{tag_name}' not found, returning empty list")
|
||||
return []
|
||||
|
||||
# Get files with this tag (returns full file info from REPORT)
|
||||
files = await self.webdav.get_files_by_tag(tag["id"])
|
||||
if not files:
|
||||
logger.debug(f"No files found with tag '{tag_name}'")
|
||||
return []
|
||||
|
||||
logger.debug(f"Found {len(files)} files with tag '{tag_name}'")
|
||||
|
||||
# Apply MIME type filter if specified
|
||||
if mime_type_filter:
|
||||
filtered_files = [
|
||||
f
|
||||
for f in files
|
||||
if f.get("content_type", "").startswith(mime_type_filter)
|
||||
]
|
||||
logger.info(
|
||||
f"Returning {len(filtered_files)} files with tag '{tag_name}' (filtered by {mime_type_filter})"
|
||||
)
|
||||
return filtered_files
|
||||
|
||||
logger.info(f"Returning {len(files)} files with tag '{tag_name}'")
|
||||
return files
|
||||
|
||||
def _get_webdav_base_path(self) -> str:
|
||||
"""Helper to get the base WebDAV path for the authenticated user."""
|
||||
return f"/remote.php/dav/files/{self.username}"
|
||||
|
||||
async def __aenter__(self):
|
||||
"""Async context manager entry."""
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Async context manager exit - closes all clients."""
|
||||
await self.close()
|
||||
return False # Don't suppress exceptions
|
||||
|
||||
async def close(self):
|
||||
"""Close the HTTP client and CalDAV client."""
|
||||
await self._client.aclose()
|
||||
|
||||
@@ -1,394 +0,0 @@
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
import logging
|
||||
from enum import IntEnum
|
||||
from typing import Any
|
||||
|
||||
from .base import BaseNextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class NewsItemType(IntEnum):
|
||||
"""Type constants for News API item queries."""
|
||||
|
||||
FEED = 0 # Single feed
|
||||
FOLDER = 1 # Folder and its feeds
|
||||
STARRED = 2 # All starred items
|
||||
ALL = 3 # All items
|
||||
|
||||
|
||||
class NewsClient(BaseNextcloudClient):
|
||||
"""Client for Nextcloud News app operations."""
|
||||
|
||||
app_name = "news"
|
||||
API_BASE = "/apps/news/api/v1-3"
|
||||
|
||||
# --- Folders ---
|
||||
|
||||
async def get_folders(self) -> list[dict[str, Any]]:
|
||||
"""Get all folders."""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/folders")
|
||||
return response.json().get("folders", [])
|
||||
|
||||
async def create_folder(self, name: str) -> dict[str, Any]:
|
||||
"""Create a new folder.
|
||||
|
||||
Args:
|
||||
name: Folder name
|
||||
|
||||
Returns:
|
||||
Created folder data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if folder name already exists,
|
||||
422 if name is empty
|
||||
"""
|
||||
response = await self._make_request(
|
||||
"POST", f"{self.API_BASE}/folders", json={"name": name}
|
||||
)
|
||||
folders = response.json().get("folders", [])
|
||||
return folders[0] if folders else {}
|
||||
|
||||
async def rename_folder(self, folder_id: int, name: str) -> None:
|
||||
"""Rename a folder.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
name: New folder name
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found, 409 if name exists
|
||||
"""
|
||||
await self._make_request(
|
||||
"PUT", f"{self.API_BASE}/folders/{folder_id}", json={"name": name}
|
||||
)
|
||||
|
||||
async def delete_folder(self, folder_id: int) -> None:
|
||||
"""Delete a folder and all its feeds/items.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/folders/{folder_id}")
|
||||
|
||||
async def mark_folder_read(self, folder_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a folder as read.
|
||||
|
||||
Args:
|
||||
folder_id: Folder ID
|
||||
newest_item_id: ID of newest item to mark read (prevents marking
|
||||
items user hasn't seen yet)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if folder not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/folders/{folder_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Feeds ---
|
||||
|
||||
async def get_feeds(self) -> dict[str, Any]:
|
||||
"""Get all feeds with metadata.
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- feeds: List of feed objects
|
||||
- starredCount: Number of starred items
|
||||
- newestItemId: ID of newest item (omitted if no items)
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/feeds")
|
||||
return response.json()
|
||||
|
||||
async def create_feed(
|
||||
self, url: str, folder_id: int | None = None
|
||||
) -> dict[str, Any]:
|
||||
"""Subscribe to a new feed.
|
||||
|
||||
Args:
|
||||
url: Feed URL
|
||||
folder_id: Optional folder ID (None for root)
|
||||
|
||||
Returns:
|
||||
Created feed data
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 409 if feed already exists, 422 if URL is invalid
|
||||
"""
|
||||
body: dict[str, Any] = {"url": url}
|
||||
if folder_id is not None:
|
||||
body["folderId"] = folder_id
|
||||
response = await self._make_request("POST", f"{self.API_BASE}/feeds", json=body)
|
||||
data = response.json()
|
||||
feeds = data.get("feeds", [])
|
||||
return feeds[0] if feeds else {}
|
||||
|
||||
async def delete_feed(self, feed_id: int) -> None:
|
||||
"""Unsubscribe from a feed (deletes all items).
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request("DELETE", f"{self.API_BASE}/feeds/{feed_id}")
|
||||
|
||||
async def move_feed(self, feed_id: int, folder_id: int | None) -> None:
|
||||
"""Move a feed to a different folder.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
folder_id: Target folder ID (None for root)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/move",
|
||||
json={"folderId": folder_id},
|
||||
)
|
||||
|
||||
async def rename_feed(self, feed_id: int, title: str) -> None:
|
||||
"""Rename a feed.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
title: New feed title
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/rename",
|
||||
json={"feedTitle": title},
|
||||
)
|
||||
|
||||
async def mark_feed_read(self, feed_id: int, newest_item_id: int) -> None:
|
||||
"""Mark all items in a feed as read.
|
||||
|
||||
Args:
|
||||
feed_id: Feed ID
|
||||
newest_item_id: ID of newest item to mark read
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if feed not found
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/feeds/{feed_id}/read",
|
||||
json={"newestItemId": newest_item_id},
|
||||
)
|
||||
|
||||
# --- Items ---
|
||||
|
||||
async def get_items(
|
||||
self,
|
||||
batch_size: int = 50,
|
||||
offset: int = 0,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
get_read: bool = True,
|
||||
oldest_first: bool = False,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items (articles) with filtering.
|
||||
|
||||
Args:
|
||||
batch_size: Number of items to return (-1 for all)
|
||||
offset: Item ID to start after (for pagination)
|
||||
type_: Item type filter (NewsItemType)
|
||||
id_: Feed/folder ID (ignored for STARRED/ALL types)
|
||||
get_read: Include read items
|
||||
oldest_first: Sort oldest first instead of newest
|
||||
|
||||
Returns:
|
||||
List of item objects
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"batchSize": batch_size,
|
||||
"offset": offset,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
"getRead": str(get_read).lower(),
|
||||
"oldestFirst": str(oldest_first).lower(),
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def get_item(self, item_id: int) -> dict[str, Any]:
|
||||
"""Get a specific item by ID.
|
||||
|
||||
Note: The News API doesn't have a direct single-item endpoint,
|
||||
so we fetch all items and filter. For efficiency, consider
|
||||
caching or using get_items with specific feed if known.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Returns:
|
||||
Item data
|
||||
|
||||
Raises:
|
||||
ValueError: If item not found
|
||||
"""
|
||||
# Fetch all items and find the one we need
|
||||
# This is inefficient but the API doesn't provide a direct endpoint
|
||||
items = await self.get_items(batch_size=-1, get_read=True)
|
||||
for item in items:
|
||||
if item.get("id") == item_id:
|
||||
return item
|
||||
raise ValueError(f"Item {item_id} not found")
|
||||
|
||||
async def get_updated_items(
|
||||
self,
|
||||
last_modified: int,
|
||||
type_: int = NewsItemType.ALL,
|
||||
id_: int = 0,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Get items modified since a timestamp (for delta sync).
|
||||
|
||||
Args:
|
||||
last_modified: Unix timestamp (seconds or microseconds)
|
||||
type_: Item type filter
|
||||
id_: Feed/folder ID
|
||||
|
||||
Returns:
|
||||
List of modified items (includes deleted items)
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"lastModified": last_modified,
|
||||
"type": type_,
|
||||
"id": id_,
|
||||
}
|
||||
response = await self._make_request(
|
||||
"GET", f"{self.API_BASE}/items/updated", params=params
|
||||
)
|
||||
return response.json().get("items", [])
|
||||
|
||||
async def mark_item_read(self, item_id: int) -> None:
|
||||
"""Mark a single item as read.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/read")
|
||||
|
||||
async def mark_item_unread(self, item_id: int) -> None:
|
||||
"""Mark a single item as unread.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unread")
|
||||
|
||||
async def star_item(self, item_id: int) -> None:
|
||||
"""Star (favorite) a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/star")
|
||||
|
||||
async def unstar_item(self, item_id: int) -> None:
|
||||
"""Unstar a single item.
|
||||
|
||||
Args:
|
||||
item_id: Item ID
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: 404 if item not found
|
||||
"""
|
||||
await self._make_request("POST", f"{self.API_BASE}/items/{item_id}/unstar")
|
||||
|
||||
async def mark_items_read(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as read.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def mark_items_unread(self, item_ids: list[int]) -> None:
|
||||
"""Mark multiple items as unread.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unread/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def star_items(self, item_ids: list[int]) -> None:
|
||||
"""Star multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/star/multiple", json={"itemIds": item_ids}
|
||||
)
|
||||
|
||||
async def unstar_items(self, item_ids: list[int]) -> None:
|
||||
"""Unstar multiple items.
|
||||
|
||||
Args:
|
||||
item_ids: List of item IDs
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST",
|
||||
f"{self.API_BASE}/items/unstar/multiple",
|
||||
json={"itemIds": item_ids},
|
||||
)
|
||||
|
||||
async def mark_all_read(self, newest_item_id: int) -> None:
|
||||
"""Mark all items as read.
|
||||
|
||||
Args:
|
||||
newest_item_id: ID of newest item to mark read
|
||||
"""
|
||||
await self._make_request(
|
||||
"POST", f"{self.API_BASE}/items/read", json={"newestItemId": newest_item_id}
|
||||
)
|
||||
|
||||
# --- Status ---
|
||||
|
||||
async def get_status(self) -> dict[str, Any]:
|
||||
"""Get News app status and configuration.
|
||||
|
||||
Returns:
|
||||
Dict with version and warnings
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/status")
|
||||
return response.json()
|
||||
|
||||
async def get_version(self) -> str:
|
||||
"""Get News app version.
|
||||
|
||||
Returns:
|
||||
Version string (e.g., "25.0.0")
|
||||
"""
|
||||
response = await self._make_request("GET", f"{self.API_BASE}/version")
|
||||
return response.json().get("version", "")
|
||||
@@ -821,20 +821,6 @@ class WebDAVClient(BaseNextcloudClient):
|
||||
item["file_id"] = int(value) if value else None
|
||||
elif tag == "favorite":
|
||||
item["is_favorite"] = value == "1"
|
||||
elif tag == "tags":
|
||||
# Tags can be comma-separated or have multiple child elements
|
||||
if value:
|
||||
# Handle comma-separated tags
|
||||
item["tags"] = [
|
||||
t.strip() for t in value.split(",") if t.strip()
|
||||
]
|
||||
else:
|
||||
# Check for child tag elements (alternative format)
|
||||
tag_elements = child.findall(".//{http://owncloud.org/ns}tag")
|
||||
if tag_elements:
|
||||
item["tags"] = [t.text for t in tag_elements if t.text]
|
||||
else:
|
||||
item["tags"] = []
|
||||
elif tag == "permissions":
|
||||
item["permissions"] = value
|
||||
elif tag == "size":
|
||||
@@ -962,576 +948,3 @@ class WebDAVClient(BaseNextcloudClient):
|
||||
properties=properties,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def find_by_tag(
|
||||
self, tag_name: str, scope: str = "", limit: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Find files by tag name.
|
||||
|
||||
DEPRECATED: Use NextcloudClient.find_files_by_tag() instead, which uses
|
||||
the proper OCS Tags API rather than WebDAV SEARCH.
|
||||
|
||||
Args:
|
||||
tag_name: Tag to filter by (e.g., "vector-index")
|
||||
scope: Directory path to search in (empty string for user root)
|
||||
limit: Maximum number of results to return
|
||||
|
||||
Returns:
|
||||
List of files/directories with the specified tag
|
||||
|
||||
Examples:
|
||||
# Find all files tagged with "vector-index"
|
||||
results = await find_by_tag("vector-index")
|
||||
|
||||
# Find tagged files in a specific folder
|
||||
results = await find_by_tag("vector-index", scope="Documents")
|
||||
"""
|
||||
# Use LIKE for tag matching since tags can be comma-separated
|
||||
where_conditions = f"""
|
||||
<d:like>
|
||||
<d:prop>
|
||||
<oc:tags/>
|
||||
</d:prop>
|
||||
<d:literal>%{tag_name}%</d:literal>
|
||||
</d:like>
|
||||
"""
|
||||
|
||||
# Request tag property along with standard properties
|
||||
properties = [
|
||||
"displayname",
|
||||
"getcontentlength",
|
||||
"getcontenttype",
|
||||
"getlastmodified",
|
||||
"resourcetype",
|
||||
"getetag",
|
||||
"fileid",
|
||||
"tags",
|
||||
]
|
||||
|
||||
return await self.search_files(
|
||||
scope=scope,
|
||||
where_conditions=where_conditions,
|
||||
properties=properties,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
async def _get_file_info_by_id(self, file_id: int) -> Dict[str, Any]:
|
||||
"""Get file information by Nextcloud file ID using WebDAV.
|
||||
|
||||
Args:
|
||||
file_id: Nextcloud internal file ID
|
||||
|
||||
Returns:
|
||||
File information dictionary with path, size, content_type, etc.
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If file not found or request fails
|
||||
"""
|
||||
# Nextcloud allows accessing files by ID via special meta endpoint
|
||||
meta_path = f"/remote.php/dav/meta/{file_id}/"
|
||||
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:resourcetype/>
|
||||
<d:getetag/>
|
||||
<oc:fileid/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
headers = {"Depth": "0", "Content-Type": "text/xml", "OCS-APIRequest": "true"}
|
||||
|
||||
response = await self._make_request(
|
||||
"PROPFIND", meta_path, content=propfind_body, headers=headers
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse the XML response
|
||||
root = ET.fromstring(response.content)
|
||||
responses = root.findall(".//{DAV:}response")
|
||||
|
||||
if not responses:
|
||||
raise RuntimeError(f"File ID {file_id} not found")
|
||||
|
||||
response_elem = responses[0]
|
||||
href = response_elem.find(".//{DAV:}href")
|
||||
if href is None:
|
||||
raise RuntimeError(f"No href in response for file ID {file_id}")
|
||||
|
||||
propstat = response_elem.find(".//{DAV:}propstat")
|
||||
if propstat is None:
|
||||
raise RuntimeError(f"No propstat for file ID {file_id}")
|
||||
|
||||
prop = propstat.find(".//{DAV:}prop")
|
||||
if prop is None:
|
||||
raise RuntimeError(f"No prop for file ID {file_id}")
|
||||
|
||||
# Extract file path from displayname or construct from file ID
|
||||
displayname_elem = prop.find(".//{DAV:}displayname")
|
||||
name = (
|
||||
displayname_elem.text if displayname_elem is not None else f"file_{file_id}"
|
||||
)
|
||||
|
||||
# Get file properties
|
||||
size_elem = prop.find(".//{DAV:}getcontentlength")
|
||||
size = int(size_elem.text) if size_elem is not None and size_elem.text else 0
|
||||
|
||||
content_type_elem = prop.find(".//{DAV:}getcontenttype")
|
||||
content_type = content_type_elem.text if content_type_elem is not None else None
|
||||
|
||||
modified_elem = prop.find(".//{DAV:}getlastmodified")
|
||||
modified = modified_elem.text if modified_elem is not None else None
|
||||
|
||||
etag_elem = prop.find(".//{DAV:}getetag")
|
||||
etag = (
|
||||
etag_elem.text.strip('"')
|
||||
if etag_elem is not None and etag_elem.text
|
||||
else None
|
||||
)
|
||||
|
||||
# Check if it's a directory
|
||||
resourcetype = prop.find(".//{DAV:}resourcetype")
|
||||
is_directory = (
|
||||
resourcetype is not None
|
||||
and resourcetype.find(".//{DAV:}collection") is not None
|
||||
)
|
||||
|
||||
# Try to get actual file path - meta endpoint doesn't give us the real path
|
||||
# so we'll construct a reasonable path from the name
|
||||
# The calling code in NextcloudClient will have the context to determine the actual path
|
||||
file_info = {
|
||||
"name": name,
|
||||
"path": f"/{name}", # Placeholder - caller should use WebDAV to get real path if needed
|
||||
"size": size,
|
||||
"content_type": content_type,
|
||||
"last_modified": modified,
|
||||
"etag": etag,
|
||||
"is_directory": is_directory,
|
||||
"file_id": file_id,
|
||||
}
|
||||
|
||||
logger.debug(f"Retrieved file info for ID {file_id}: {name}")
|
||||
return file_info
|
||||
|
||||
async def get_tag_by_name(self, tag_name: str) -> dict[str, Any] | None:
|
||||
"""Get a system tag by its name via WebDAV.
|
||||
|
||||
Args:
|
||||
tag_name: Name of the tag to find (case-sensitive)
|
||||
|
||||
Returns:
|
||||
Tag dictionary if found, None otherwise
|
||||
"""
|
||||
# Use WebDAV PROPFIND to list all systemtags
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:id/>
|
||||
<oc:display-name/>
|
||||
<oc:user-visible/>
|
||||
<oc:user-assignable/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
response = await self._client.request(
|
||||
"PROPFIND",
|
||||
"/remote.php/dav/systemtags/",
|
||||
headers={"Depth": "1"},
|
||||
content=propfind_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
for response_elem in root.findall("d:response", ns):
|
||||
href = response_elem.find("d:href", ns)
|
||||
if href is None or href.text == "/remote.php/dav/systemtags/":
|
||||
# Skip the collection itself
|
||||
continue
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
continue
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
continue
|
||||
|
||||
# Extract tag properties
|
||||
tag_id_elem = prop.find("oc:id", ns)
|
||||
display_name_elem = prop.find("oc:display-name", ns)
|
||||
user_visible_elem = prop.find("oc:user-visible", ns)
|
||||
user_assignable_elem = prop.find("oc:user-assignable", ns)
|
||||
|
||||
if display_name_elem is not None and display_name_elem.text == tag_name:
|
||||
tag_info = {
|
||||
"id": int(tag_id_elem.text)
|
||||
if tag_id_elem is not None and tag_id_elem.text is not None
|
||||
else None,
|
||||
"name": display_name_elem.text,
|
||||
"userVisible": user_visible_elem.text.lower() == "true"
|
||||
if user_visible_elem is not None
|
||||
and user_visible_elem.text is not None
|
||||
else True,
|
||||
"userAssignable": user_assignable_elem.text.lower() == "true"
|
||||
if user_assignable_elem is not None
|
||||
and user_assignable_elem.text is not None
|
||||
else True,
|
||||
}
|
||||
logger.debug(f"Found tag '{tag_name}' with ID {tag_info['id']}")
|
||||
return tag_info
|
||||
|
||||
logger.debug(f"Tag '{tag_name}' not found")
|
||||
return None
|
||||
|
||||
async def get_files_by_tag(self, tag_id: int) -> list[dict[str, Any]]:
|
||||
"""Get all files tagged with a specific system tag via WebDAV REPORT.
|
||||
|
||||
Args:
|
||||
tag_id: Numeric ID of the tag
|
||||
|
||||
Returns:
|
||||
List of file info dictionaries with path, size, content_type, etc.
|
||||
"""
|
||||
# Use WebDAV REPORT method with systemtag filter, requesting all properties
|
||||
report_body = f"""<?xml version="1.0"?>
|
||||
<oc:filter-files xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns" xmlns:nc="http://nextcloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:fileid/>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:getetag/>
|
||||
</d:prop>
|
||||
<oc:filter-rules>
|
||||
<oc:systemtag>{tag_id}</oc:systemtag>
|
||||
</oc:filter-rules>
|
||||
</oc:filter-files>"""
|
||||
|
||||
response = await self._client.request(
|
||||
"REPORT",
|
||||
f"{self._get_webdav_base_path()}/",
|
||||
content=report_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
files = []
|
||||
for response_elem in root.findall("d:response", ns):
|
||||
# Extract href (file path)
|
||||
href_elem = response_elem.find("d:href", ns)
|
||||
if href_elem is None or not href_elem.text:
|
||||
continue
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
continue
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
continue
|
||||
|
||||
# Extract all properties
|
||||
fileid_elem = prop.find("oc:fileid", ns)
|
||||
displayname_elem = prop.find("d:displayname", ns)
|
||||
contentlength_elem = prop.find("d:getcontentlength", ns)
|
||||
contenttype_elem = prop.find("d:getcontenttype", ns)
|
||||
lastmodified_elem = prop.find("d:getlastmodified", ns)
|
||||
etag_elem = prop.find("d:getetag", ns)
|
||||
|
||||
if fileid_elem is None or not fileid_elem.text:
|
||||
continue
|
||||
|
||||
# Decode href path and extract the file path
|
||||
from urllib.parse import unquote
|
||||
|
||||
href_path = unquote(href_elem.text)
|
||||
# Remove WebDAV prefix to get user-relative path
|
||||
webdav_prefix = f"/remote.php/dav/files/{self.username}/"
|
||||
file_path = href_path.replace(webdav_prefix, "/")
|
||||
|
||||
# Parse last modified timestamp
|
||||
last_modified_timestamp = None
|
||||
if lastmodified_elem is not None and lastmodified_elem.text:
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
try:
|
||||
dt = parsedate_to_datetime(lastmodified_elem.text)
|
||||
last_modified_timestamp = int(dt.timestamp())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
file_info = {
|
||||
"id": int(fileid_elem.text),
|
||||
"path": file_path,
|
||||
"name": displayname_elem.text
|
||||
if displayname_elem is not None
|
||||
else file_path.split("/")[-1],
|
||||
"size": int(contentlength_elem.text)
|
||||
if contentlength_elem is not None and contentlength_elem.text
|
||||
else 0,
|
||||
"content_type": contenttype_elem.text
|
||||
if contenttype_elem is not None
|
||||
else "",
|
||||
"last_modified": lastmodified_elem.text
|
||||
if lastmodified_elem is not None
|
||||
else None,
|
||||
"last_modified_timestamp": last_modified_timestamp,
|
||||
"etag": etag_elem.text if etag_elem is not None else None,
|
||||
}
|
||||
files.append(file_info)
|
||||
|
||||
logger.debug(f"Found {len(files)} files with tag ID {tag_id}")
|
||||
return files
|
||||
|
||||
async def get_file_info(self, path: str) -> dict[str, Any] | None:
|
||||
"""Get file info including file ID via WebDAV PROPFIND.
|
||||
|
||||
Args:
|
||||
path: Path to the file (relative to user's files directory)
|
||||
|
||||
Returns:
|
||||
File info dictionary with id, name, size, content_type, etc.
|
||||
Returns None if file not found.
|
||||
"""
|
||||
webdav_path = f"{self._get_webdav_base_path()}/{path.lstrip('/')}"
|
||||
|
||||
propfind_body = """<?xml version="1.0"?>
|
||||
<d:propfind xmlns:d="DAV:" xmlns:oc="http://owncloud.org/ns">
|
||||
<d:prop>
|
||||
<oc:fileid/>
|
||||
<d:displayname/>
|
||||
<d:getcontentlength/>
|
||||
<d:getcontenttype/>
|
||||
<d:getlastmodified/>
|
||||
<d:getetag/>
|
||||
<d:resourcetype/>
|
||||
</d:prop>
|
||||
</d:propfind>"""
|
||||
|
||||
try:
|
||||
response = await self._client.request(
|
||||
"PROPFIND",
|
||||
webdav_path,
|
||||
headers={"Depth": "0"},
|
||||
content=propfind_body,
|
||||
)
|
||||
response.raise_for_status()
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 404:
|
||||
logger.debug(f"File not found: {path}")
|
||||
return None
|
||||
raise
|
||||
|
||||
# Parse XML response
|
||||
root = ET.fromstring(response.content)
|
||||
ns = {
|
||||
"d": "DAV:",
|
||||
"oc": "http://owncloud.org/ns",
|
||||
}
|
||||
|
||||
response_elem = root.find("d:response", ns)
|
||||
if response_elem is None:
|
||||
return None
|
||||
|
||||
propstat = response_elem.find("d:propstat", ns)
|
||||
if propstat is None:
|
||||
return None
|
||||
|
||||
prop = propstat.find("d:prop", ns)
|
||||
if prop is None:
|
||||
return None
|
||||
|
||||
# Extract properties
|
||||
fileid_elem = prop.find("oc:fileid", ns)
|
||||
displayname_elem = prop.find("d:displayname", ns)
|
||||
contentlength_elem = prop.find("d:getcontentlength", ns)
|
||||
contenttype_elem = prop.find("d:getcontenttype", ns)
|
||||
lastmodified_elem = prop.find("d:getlastmodified", ns)
|
||||
etag_elem = prop.find("d:getetag", ns)
|
||||
resourcetype_elem = prop.find("d:resourcetype", ns)
|
||||
|
||||
is_directory = (
|
||||
resourcetype_elem is not None
|
||||
and resourcetype_elem.find("d:collection", ns) is not None
|
||||
)
|
||||
|
||||
file_info = {
|
||||
"id": int(fileid_elem.text)
|
||||
if fileid_elem is not None and fileid_elem.text is not None
|
||||
else None,
|
||||
"path": path,
|
||||
"name": displayname_elem.text
|
||||
if displayname_elem is not None
|
||||
else path.split("/")[-1],
|
||||
"size": int(contentlength_elem.text)
|
||||
if contentlength_elem is not None and contentlength_elem.text
|
||||
else 0,
|
||||
"content_type": contenttype_elem.text
|
||||
if contenttype_elem is not None
|
||||
else "",
|
||||
"last_modified": lastmodified_elem.text
|
||||
if lastmodified_elem is not None
|
||||
else None,
|
||||
"etag": etag_elem.text.strip('"')
|
||||
if etag_elem is not None and etag_elem.text
|
||||
else None,
|
||||
"is_directory": is_directory,
|
||||
}
|
||||
|
||||
logger.debug(f"Got file info for '{path}': id={file_info['id']}")
|
||||
return file_info
|
||||
|
||||
async def create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Create a system tag via WebDAV.
|
||||
|
||||
Args:
|
||||
name: Name of the tag to create
|
||||
user_visible: Whether the tag is visible to users
|
||||
user_assignable: Whether users can assign this tag
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag creation fails (409 if already exists)
|
||||
"""
|
||||
# Use WebDAV POST with JSON body to create tag
|
||||
response = await self._client.post(
|
||||
"/remote.php/dav/systemtags/",
|
||||
headers={"Content-Type": "application/json"},
|
||||
json={
|
||||
"name": name,
|
||||
"userVisible": user_visible,
|
||||
"userAssignable": user_assignable,
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
# Extract tag ID from Content-Location header (e.g., /remote.php/dav/systemtags/42)
|
||||
content_location = response.headers.get("Content-Location", "")
|
||||
tag_id = None
|
||||
if content_location:
|
||||
# Extract the numeric ID from the path
|
||||
try:
|
||||
tag_id = int(content_location.rstrip("/").split("/")[-1])
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
tag_info = {
|
||||
"id": tag_id,
|
||||
"name": name,
|
||||
"userVisible": user_visible,
|
||||
"userAssignable": user_assignable,
|
||||
}
|
||||
|
||||
logger.info(f"Created tag '{name}' with ID {tag_info['id']}")
|
||||
return tag_info
|
||||
|
||||
async def get_or_create_tag(
|
||||
self,
|
||||
name: str,
|
||||
user_visible: bool = True,
|
||||
user_assignable: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
"""Get a tag by name, creating it if it doesn't exist.
|
||||
|
||||
Args:
|
||||
name: Name of the tag
|
||||
user_visible: Whether the tag is visible to users (for creation)
|
||||
user_assignable: Whether users can assign this tag (for creation)
|
||||
|
||||
Returns:
|
||||
Tag dictionary with id, name, userVisible, userAssignable
|
||||
"""
|
||||
# First try to get existing tag
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
logger.debug(f"Tag '{name}' already exists with ID {existing_tag['id']}")
|
||||
return existing_tag
|
||||
|
||||
# Create new tag
|
||||
try:
|
||||
return await self.create_tag(name, user_visible, user_assignable)
|
||||
except HTTPStatusError as e:
|
||||
if e.response.status_code == 409:
|
||||
# Tag was created between our check and creation, fetch it
|
||||
existing_tag = await self.get_tag_by_name(name)
|
||||
if existing_tag:
|
||||
return existing_tag
|
||||
raise
|
||||
|
||||
async def assign_tag_to_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Assign a system tag to a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was assigned successfully (or already assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag assignment fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"PUT",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
headers={"Content-Length": "0"},
|
||||
content=b"",
|
||||
)
|
||||
|
||||
# 201 = Created (new assignment), 409 = Conflict (already assigned)
|
||||
if response.status_code in (201, 409):
|
||||
logger.info(f"Tagged file {file_id} with tag {tag_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
async def remove_tag_from_file(self, file_id: int, tag_id: int) -> bool:
|
||||
"""Remove a system tag from a file.
|
||||
|
||||
Args:
|
||||
file_id: Numeric file ID
|
||||
tag_id: Numeric tag ID
|
||||
|
||||
Returns:
|
||||
True if tag was removed successfully (or wasn't assigned)
|
||||
|
||||
Raises:
|
||||
HTTPStatusError: If tag removal fails
|
||||
"""
|
||||
response = await self._client.request(
|
||||
"DELETE",
|
||||
f"/remote.php/dav/systemtags-relations/files/{file_id}/{tag_id}",
|
||||
)
|
||||
|
||||
# 204 = No Content (removed), 404 = Not Found (wasn't assigned)
|
||||
if response.status_code in (204, 404):
|
||||
logger.info(f"Removed tag {tag_id} from file {file_id}")
|
||||
return True
|
||||
|
||||
response.raise_for_status()
|
||||
return True
|
||||
|
||||
@@ -2,37 +2,8 @@ import logging
|
||||
import logging.config
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class DeploymentMode(Enum):
|
||||
"""Deployment mode for the MCP server.
|
||||
|
||||
SELF_HOSTED: Full features, environment-based configuration.
|
||||
Supports vector sync, semantic search, admin UI.
|
||||
|
||||
SMITHERY_STATELESS: Stateless mode for Smithery hosting.
|
||||
Session-based configuration, no persistent storage.
|
||||
Excludes semantic search, vector sync, admin UI.
|
||||
"""
|
||||
|
||||
SELF_HOSTED = "self_hosted"
|
||||
SMITHERY_STATELESS = "smithery"
|
||||
|
||||
|
||||
def get_deployment_mode() -> DeploymentMode:
|
||||
"""Detect deployment mode from environment.
|
||||
|
||||
Returns:
|
||||
DeploymentMode.SMITHERY_STATELESS if SMITHERY_DEPLOYMENT=true,
|
||||
otherwise DeploymentMode.SELF_HOSTED (default).
|
||||
"""
|
||||
if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
|
||||
return DeploymentMode.SMITHERY_STATELESS
|
||||
return DeploymentMode.SELF_HOSTED
|
||||
|
||||
|
||||
LOGGING_CONFIG = {
|
||||
"version": 1,
|
||||
"disable_existing_loggers": False,
|
||||
@@ -131,14 +102,6 @@ def get_document_processor_config() -> dict[str, Any]:
|
||||
"lang": os.getenv("TESSERACT_LANG", "eng"),
|
||||
}
|
||||
|
||||
# PyMuPDF configuration (local PDF processing)
|
||||
if os.getenv("ENABLE_PYMUPDF", "true").lower() == "true": # Enabled by default
|
||||
config["processors"]["pymupdf"] = {
|
||||
"extract_images": os.getenv("PYMUPDF_EXTRACT_IMAGES", "true").lower()
|
||||
== "true",
|
||||
"image_dir": os.getenv("PYMUPDF_IMAGE_DIR"), # None = use temp directory
|
||||
}
|
||||
|
||||
# Custom processor (via HTTP API)
|
||||
if os.getenv("ENABLE_CUSTOM_PROCESSOR", "false").lower() == "true":
|
||||
custom_url = os.getenv("CUSTOM_PROCESSOR_URL")
|
||||
@@ -163,12 +126,6 @@ def get_document_processor_config() -> dict[str, Any]:
|
||||
class Settings:
|
||||
"""Application settings from environment variables."""
|
||||
|
||||
# Deployment mode (ADR-021: explicit mode selection)
|
||||
# Optional: If not set, mode is auto-detected from other settings
|
||||
# Valid values: single_user_basic, multi_user_basic, oauth_single_audience,
|
||||
# oauth_token_exchange, smithery
|
||||
deployment_mode: Optional[str] = None
|
||||
|
||||
# OAuth/OIDC settings
|
||||
oidc_discovery_url: Optional[str] = None
|
||||
oidc_client_id: Optional[str] = None
|
||||
@@ -193,11 +150,6 @@ class Settings:
|
||||
enable_token_exchange: bool = False
|
||||
enable_offline_access: bool = False
|
||||
|
||||
# Multi-user BasicAuth pass-through mode (ADR-019 interim solution)
|
||||
# When enabled, MCP server extracts BasicAuth credentials from request headers
|
||||
# and passes them through to Nextcloud APIs (no storage, stateless)
|
||||
enable_multi_user_basic_auth: bool = False
|
||||
|
||||
# Token exchange cache settings
|
||||
token_exchange_cache_ttl: int = 300 # seconds (5 minutes default)
|
||||
|
||||
@@ -216,7 +168,6 @@ class Settings:
|
||||
vector_sync_scan_interval: int = 300 # seconds (5 minutes)
|
||||
vector_sync_processor_workers: int = 3
|
||||
vector_sync_queue_max_size: int = 10000
|
||||
vector_sync_user_poll_interval: int = 60 # seconds - OAuth mode user discovery
|
||||
|
||||
# Qdrant settings (mutually exclusive modes)
|
||||
qdrant_url: Optional[str] = None # Network mode: http://qdrant:6333
|
||||
@@ -229,11 +180,6 @@ class Settings:
|
||||
ollama_embedding_model: str = "nomic-embed-text"
|
||||
ollama_verify_ssl: bool = True
|
||||
|
||||
# OpenAI settings (for embeddings)
|
||||
openai_api_key: Optional[str] = None
|
||||
openai_base_url: Optional[str] = None
|
||||
openai_embedding_model: str = "text-embedding-3-small"
|
||||
|
||||
# Document chunking settings (for vector embeddings)
|
||||
document_chunk_size: int = 2048 # Characters per chunk
|
||||
document_chunk_overlap: int = 200 # Overlapping characters between chunks
|
||||
@@ -292,29 +238,6 @@ class Settings:
|
||||
f"DOCUMENT_CHUNK_OVERLAP ({self.document_chunk_overlap}) cannot be negative."
|
||||
)
|
||||
|
||||
def get_embedding_model_name(self) -> str:
|
||||
"""
|
||||
Get the active embedding model name based on provider priority.
|
||||
|
||||
Priority order (same as ProviderRegistry):
|
||||
1. OpenAI - if OPENAI_API_KEY is set
|
||||
2. Ollama - if OLLAMA_BASE_URL is set
|
||||
3. Simple - fallback (returns "simple-384")
|
||||
|
||||
Returns:
|
||||
Active embedding model name
|
||||
"""
|
||||
# Check OpenAI first (higher priority than Ollama in registry)
|
||||
if self.openai_api_key:
|
||||
return self.openai_embedding_model
|
||||
|
||||
# Check Ollama
|
||||
if self.ollama_base_url:
|
||||
return self.ollama_embedding_model
|
||||
|
||||
# Fallback to simple provider indicator
|
||||
return "simple-384"
|
||||
|
||||
def get_collection_name(self) -> str:
|
||||
"""
|
||||
Get Qdrant collection name.
|
||||
@@ -330,9 +253,8 @@ class Settings:
|
||||
Format: {deployment-id}-{model-name}
|
||||
|
||||
Examples:
|
||||
- "my-deployment-nomic-embed-text" (Ollama)
|
||||
- "my-deployment-text-embedding-3-small" (OpenAI)
|
||||
- "mcp-container-openai-text-embedding-3-small" (hostname fallback)
|
||||
- "my-deployment-nomic-embed-text" (OTEL_SERVICE_NAME set)
|
||||
- "mcp-container-all-minilm" (hostname fallback)
|
||||
|
||||
Returns:
|
||||
Collection name string
|
||||
@@ -352,135 +274,10 @@ class Settings:
|
||||
|
||||
# Sanitize deployment ID and model name
|
||||
deployment_id = deployment_id.lower().replace(" ", "-").replace("_", "-")
|
||||
model_name = self.get_embedding_model_name().replace("/", "-").replace(":", "-")
|
||||
model_name = self.ollama_embedding_model.replace("/", "-").replace(":", "-")
|
||||
|
||||
return f"{deployment_id}-{model_name}"
|
||||
|
||||
# ADR-021: Property aliases for new naming convention
|
||||
# These provide the new names while maintaining backward compatibility with old field names
|
||||
|
||||
@property
|
||||
def enable_semantic_search(self) -> bool:
|
||||
"""Semantic search enabled (ADR-021 alias for vector_sync_enabled)."""
|
||||
return self.vector_sync_enabled
|
||||
|
||||
@property
|
||||
def enable_background_operations(self) -> bool:
|
||||
"""Background operations enabled (ADR-021 alias for enable_offline_access)."""
|
||||
return self.enable_offline_access
|
||||
|
||||
|
||||
def _get_semantic_search_enabled() -> bool:
|
||||
"""Get semantic search enabled status, supporting both old and new variable names.
|
||||
|
||||
Supports:
|
||||
- ENABLE_SEMANTIC_SEARCH (new, preferred)
|
||||
- VECTOR_SYNC_ENABLED (old, deprecated)
|
||||
|
||||
Returns:
|
||||
True if semantic search should be enabled
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
new_value = os.getenv("ENABLE_SEMANTIC_SEARCH", "").lower() == "true"
|
||||
old_value = os.getenv("VECTOR_SYNC_ENABLED", "").lower() == "true"
|
||||
|
||||
if new_value and old_value:
|
||||
logger.warning(
|
||||
"Both ENABLE_SEMANTIC_SEARCH and VECTOR_SYNC_ENABLED are set. "
|
||||
"Using ENABLE_SEMANTIC_SEARCH. "
|
||||
"VECTOR_SYNC_ENABLED is deprecated and will be removed in v1.0.0."
|
||||
)
|
||||
elif old_value and not new_value:
|
||||
logger.warning(
|
||||
"VECTOR_SYNC_ENABLED is deprecated. "
|
||||
"Please use ENABLE_SEMANTIC_SEARCH instead. "
|
||||
"Support for VECTOR_SYNC_ENABLED will be removed in v1.0.0."
|
||||
)
|
||||
|
||||
return new_value or old_value
|
||||
|
||||
|
||||
def _is_multi_user_mode() -> bool:
|
||||
"""Detect if this is a multi-user deployment mode.
|
||||
|
||||
Multi-user modes are:
|
||||
- Multi-user BasicAuth (ENABLE_MULTI_USER_BASIC_AUTH=true)
|
||||
- OAuth Single-Audience (no username/password set)
|
||||
- OAuth Token Exchange (ENABLE_TOKEN_EXCHANGE=true)
|
||||
|
||||
Single-user modes are:
|
||||
- Single-user BasicAuth (username and password both set)
|
||||
- Smithery Stateless (SMITHERY_DEPLOYMENT=true)
|
||||
|
||||
Returns:
|
||||
True if multi-user mode detected
|
||||
"""
|
||||
# Smithery is always single-user (stateless)
|
||||
if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
|
||||
return False
|
||||
|
||||
# Multi-user BasicAuth explicitly enabled
|
||||
if os.getenv("ENABLE_MULTI_USER_BASIC_AUTH", "false").lower() == "true":
|
||||
return True
|
||||
|
||||
# Token exchange implies OAuth multi-user
|
||||
if os.getenv("ENABLE_TOKEN_EXCHANGE", "false").lower() == "true":
|
||||
return True
|
||||
|
||||
# If both username and password are set, it's single-user BasicAuth
|
||||
has_username = bool(os.getenv("NEXTCLOUD_USERNAME"))
|
||||
has_password = bool(os.getenv("NEXTCLOUD_PASSWORD"))
|
||||
if has_username and has_password:
|
||||
return False
|
||||
|
||||
# Otherwise, assume OAuth multi-user (default when no credentials provided)
|
||||
return True
|
||||
|
||||
|
||||
def _get_background_operations_enabled() -> bool:
|
||||
"""Get background operations enabled status with auto-enablement for semantic search.
|
||||
|
||||
Supports:
|
||||
- ENABLE_BACKGROUND_OPERATIONS (new, preferred)
|
||||
- ENABLE_OFFLINE_ACCESS (old, deprecated)
|
||||
- Auto-enabled if ENABLE_SEMANTIC_SEARCH=true in multi-user modes
|
||||
|
||||
Returns:
|
||||
True if background operations should be enabled
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Check new and old variable names
|
||||
explicit = os.getenv("ENABLE_BACKGROUND_OPERATIONS", "").lower() == "true"
|
||||
legacy = os.getenv("ENABLE_OFFLINE_ACCESS", "").lower() == "true"
|
||||
|
||||
if explicit and legacy:
|
||||
logger.warning(
|
||||
"Both ENABLE_BACKGROUND_OPERATIONS and ENABLE_OFFLINE_ACCESS are set. "
|
||||
"Using ENABLE_BACKGROUND_OPERATIONS. "
|
||||
"ENABLE_OFFLINE_ACCESS is deprecated and will be removed in v1.0.0."
|
||||
)
|
||||
elif legacy and not explicit:
|
||||
logger.warning(
|
||||
"ENABLE_OFFLINE_ACCESS is deprecated. "
|
||||
"Please use ENABLE_BACKGROUND_OPERATIONS instead. "
|
||||
"Support for ENABLE_OFFLINE_ACCESS will be removed in v1.0.0."
|
||||
)
|
||||
|
||||
# Auto-enable if semantic search is enabled in multi-user mode
|
||||
semantic_search_enabled = _get_semantic_search_enabled()
|
||||
is_multi_user = _is_multi_user_mode()
|
||||
auto_enabled = semantic_search_enabled and is_multi_user
|
||||
|
||||
if auto_enabled and not (explicit or legacy):
|
||||
logger.info(
|
||||
"Automatically enabled background operations for semantic search in multi-user mode. "
|
||||
"Set ENABLE_BACKGROUND_OPERATIONS=false to disable (this will also disable semantic search)."
|
||||
)
|
||||
|
||||
return explicit or legacy or auto_enabled
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
"""Get application settings from environment variables.
|
||||
@@ -488,13 +285,7 @@ def get_settings() -> Settings:
|
||||
Returns:
|
||||
Settings object with configuration values
|
||||
"""
|
||||
# Get consolidated values with smart dependency resolution
|
||||
enable_semantic_search = _get_semantic_search_enabled()
|
||||
enable_background_operations = _get_background_operations_enabled()
|
||||
|
||||
return Settings(
|
||||
# Deployment mode (ADR-021)
|
||||
deployment_mode=os.getenv("MCP_DEPLOYMENT_MODE"),
|
||||
# OAuth/OIDC settings
|
||||
oidc_discovery_url=os.getenv("OIDC_DISCOVERY_URL"),
|
||||
oidc_client_id=os.getenv("NEXTCLOUD_OIDC_CLIENT_ID"),
|
||||
@@ -515,10 +306,8 @@ def get_settings() -> Settings:
|
||||
enable_token_exchange=(
|
||||
os.getenv("ENABLE_TOKEN_EXCHANGE", "false").lower() == "true"
|
||||
),
|
||||
enable_offline_access=enable_background_operations, # Smart dependency resolution
|
||||
# Multi-user BasicAuth pass-through mode
|
||||
enable_multi_user_basic_auth=(
|
||||
os.getenv("ENABLE_MULTI_USER_BASIC_AUTH", "false").lower() == "true"
|
||||
enable_offline_access=(
|
||||
os.getenv("ENABLE_OFFLINE_ACCESS", "false").lower() == "true"
|
||||
),
|
||||
# Token exchange cache settings
|
||||
token_exchange_cache_ttl=int(os.getenv("TOKEN_EXCHANGE_CACHE_TTL", "300")),
|
||||
@@ -526,7 +315,9 @@ def get_settings() -> Settings:
|
||||
token_encryption_key=os.getenv("TOKEN_ENCRYPTION_KEY"),
|
||||
token_storage_db=os.getenv("TOKEN_STORAGE_DB", "/tmp/tokens.db"),
|
||||
# Vector sync settings (ADR-007)
|
||||
vector_sync_enabled=enable_semantic_search, # Smart dependency resolution
|
||||
vector_sync_enabled=(
|
||||
os.getenv("VECTOR_SYNC_ENABLED", "false").lower() == "true"
|
||||
),
|
||||
vector_sync_scan_interval=int(os.getenv("VECTOR_SYNC_SCAN_INTERVAL", "300")),
|
||||
vector_sync_processor_workers=int(
|
||||
os.getenv("VECTOR_SYNC_PROCESSOR_WORKERS", "3")
|
||||
@@ -534,9 +325,6 @@ def get_settings() -> Settings:
|
||||
vector_sync_queue_max_size=int(
|
||||
os.getenv("VECTOR_SYNC_QUEUE_MAX_SIZE", "10000")
|
||||
),
|
||||
vector_sync_user_poll_interval=int(
|
||||
os.getenv("VECTOR_SYNC_USER_POLL_INTERVAL", "60")
|
||||
),
|
||||
# Qdrant settings
|
||||
qdrant_url=os.getenv("QDRANT_URL"),
|
||||
qdrant_location=os.getenv("QDRANT_LOCATION"),
|
||||
@@ -546,12 +334,6 @@ def get_settings() -> Settings:
|
||||
ollama_base_url=os.getenv("OLLAMA_BASE_URL"),
|
||||
ollama_embedding_model=os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text"),
|
||||
ollama_verify_ssl=os.getenv("OLLAMA_VERIFY_SSL", "true").lower() == "true",
|
||||
# OpenAI settings
|
||||
openai_api_key=os.getenv("OPENAI_API_KEY"),
|
||||
openai_base_url=os.getenv("OPENAI_BASE_URL"),
|
||||
openai_embedding_model=os.getenv(
|
||||
"OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
|
||||
),
|
||||
# Document chunking settings
|
||||
document_chunk_size=int(os.getenv("DOCUMENT_CHUNK_SIZE", "2048")),
|
||||
document_chunk_overlap=int(os.getenv("DOCUMENT_CHUNK_OVERLAP", "200")),
|
||||
|
||||
@@ -1,460 +0,0 @@
|
||||
"""Configuration validation and mode detection for the MCP server.
|
||||
|
||||
This module provides:
|
||||
- Mode detection based on configuration
|
||||
- Configuration validation with clear error messages
|
||||
- Single source of truth for deployment mode requirements
|
||||
|
||||
See ADR-020 for detailed architecture and deployment mode documentation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
from nextcloud_mcp_server.config import Settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class AuthMode(Enum):
|
||||
"""Authentication mode for the MCP server.
|
||||
|
||||
Determines how users authenticate and how the server accesses Nextcloud.
|
||||
"""
|
||||
|
||||
SINGLE_USER_BASIC = "single_user_basic"
|
||||
MULTI_USER_BASIC = "multi_user_basic"
|
||||
OAUTH_SINGLE_AUDIENCE = "oauth_single"
|
||||
OAUTH_TOKEN_EXCHANGE = "oauth_exchange"
|
||||
SMITHERY_STATELESS = "smithery"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ModeRequirements:
|
||||
"""Requirements for a deployment mode.
|
||||
|
||||
Attributes:
|
||||
required: Configuration variables that must be set
|
||||
optional: Configuration variables that may be set
|
||||
forbidden: Configuration variables that should not be set
|
||||
conditional: Additional requirements based on feature flags
|
||||
Format: {feature_flag: [required_vars]}
|
||||
description: Human-readable description of the mode
|
||||
"""
|
||||
|
||||
required: list[str]
|
||||
optional: list[str]
|
||||
forbidden: list[str]
|
||||
conditional: dict[str, list[str]]
|
||||
description: str
|
||||
|
||||
|
||||
# Mode requirements definition
|
||||
MODE_REQUIREMENTS: dict[AuthMode, ModeRequirements] = {
|
||||
AuthMode.SINGLE_USER_BASIC: ModeRequirements(
|
||||
required=["nextcloud_host", "nextcloud_username", "nextcloud_password"],
|
||||
optional=[
|
||||
"vector_sync_enabled",
|
||||
"qdrant_url",
|
||||
"qdrant_location",
|
||||
"ollama_base_url",
|
||||
"ollama_embedding_model",
|
||||
"openai_api_key",
|
||||
"openai_embedding_model",
|
||||
"document_chunk_size",
|
||||
"document_chunk_overlap",
|
||||
],
|
||||
forbidden=[
|
||||
"enable_multi_user_basic_auth",
|
||||
"enable_token_exchange",
|
||||
"oidc_client_id",
|
||||
"oidc_client_secret",
|
||||
],
|
||||
conditional={
|
||||
"vector_sync_enabled": [
|
||||
# Either qdrant_url OR qdrant_location (checked in Settings.__post_init__)
|
||||
# At least one embedding provider (ollama_base_url OR openai_api_key)
|
||||
],
|
||||
},
|
||||
description="Single-user deployment with BasicAuth credentials. "
|
||||
"Suitable for personal Nextcloud instances and local development.",
|
||||
),
|
||||
AuthMode.MULTI_USER_BASIC: ModeRequirements(
|
||||
required=["nextcloud_host", "enable_multi_user_basic_auth"],
|
||||
optional=[
|
||||
# Background sync with app passwords (via Astrolabe)
|
||||
"enable_offline_access",
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
"oidc_client_id",
|
||||
"oidc_client_secret",
|
||||
# Vector sync
|
||||
"vector_sync_enabled",
|
||||
"qdrant_url",
|
||||
"qdrant_location",
|
||||
"ollama_base_url",
|
||||
"ollama_embedding_model",
|
||||
"openai_api_key",
|
||||
"openai_embedding_model",
|
||||
],
|
||||
forbidden=[
|
||||
"nextcloud_username",
|
||||
"nextcloud_password",
|
||||
"enable_token_exchange",
|
||||
],
|
||||
conditional={
|
||||
"enable_offline_access": [
|
||||
# OAuth credentials validated separately (lines 397-406) with clearer error message
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
],
|
||||
# Note: vector_sync_enabled (now ENABLE_SEMANTIC_SEARCH) automatically
|
||||
# enables background operations in multi-user modes. No explicit
|
||||
# enable_offline_access setting required.
|
||||
},
|
||||
description="Multi-user deployment with BasicAuth pass-through. "
|
||||
"Users provide credentials in request headers. "
|
||||
"Optional background sync using app passwords stored via Astrolabe.",
|
||||
),
|
||||
AuthMode.OAUTH_SINGLE_AUDIENCE: ModeRequirements(
|
||||
required=["nextcloud_host"],
|
||||
optional=[
|
||||
# OAuth credentials (uses DCR if not provided)
|
||||
"oidc_client_id",
|
||||
"oidc_client_secret",
|
||||
"oidc_discovery_url",
|
||||
# Offline access
|
||||
"enable_offline_access",
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
# Vector sync
|
||||
"vector_sync_enabled",
|
||||
"qdrant_url",
|
||||
"qdrant_location",
|
||||
"ollama_base_url",
|
||||
"ollama_embedding_model",
|
||||
"openai_api_key",
|
||||
"openai_embedding_model",
|
||||
# Scopes
|
||||
"nextcloud_oidc_scopes",
|
||||
],
|
||||
forbidden=[
|
||||
"nextcloud_username",
|
||||
"nextcloud_password",
|
||||
"enable_token_exchange",
|
||||
"enable_multi_user_basic_auth",
|
||||
],
|
||||
conditional={
|
||||
"enable_offline_access": [
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
],
|
||||
# Note: vector_sync_enabled (now ENABLE_SEMANTIC_SEARCH) automatically
|
||||
# enables background operations in multi-user modes. No explicit
|
||||
# enable_offline_access setting required.
|
||||
},
|
||||
description="OAuth multi-user deployment with single-audience tokens. "
|
||||
"Tokens work for both MCP server and Nextcloud APIs (pass-through). "
|
||||
"Uses Dynamic Client Registration if credentials not provided.",
|
||||
),
|
||||
AuthMode.OAUTH_TOKEN_EXCHANGE: ModeRequirements(
|
||||
required=["nextcloud_host", "enable_token_exchange"],
|
||||
optional=[
|
||||
# OAuth credentials
|
||||
"oidc_client_id",
|
||||
"oidc_client_secret",
|
||||
"oidc_discovery_url",
|
||||
# Token exchange settings
|
||||
"token_exchange_cache_ttl",
|
||||
# Offline access
|
||||
"enable_offline_access",
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
# Vector sync
|
||||
"vector_sync_enabled",
|
||||
"qdrant_url",
|
||||
"qdrant_location",
|
||||
"ollama_base_url",
|
||||
"ollama_embedding_model",
|
||||
"openai_api_key",
|
||||
"openai_embedding_model",
|
||||
],
|
||||
forbidden=[
|
||||
"nextcloud_username",
|
||||
"nextcloud_password",
|
||||
"enable_multi_user_basic_auth",
|
||||
],
|
||||
conditional={
|
||||
"enable_offline_access": [
|
||||
"token_encryption_key",
|
||||
"token_storage_db",
|
||||
],
|
||||
# Note: vector_sync_enabled (now ENABLE_SEMANTIC_SEARCH) automatically
|
||||
# enables background operations in multi-user modes. No explicit
|
||||
# enable_offline_access setting required.
|
||||
},
|
||||
description="OAuth multi-user deployment with token exchange (RFC 8693). "
|
||||
"MCP tokens are separate from Nextcloud tokens. "
|
||||
"Server exchanges MCP token for Nextcloud token on each request.",
|
||||
),
|
||||
AuthMode.SMITHERY_STATELESS: ModeRequirements(
|
||||
required=[], # All config from session URL params
|
||||
optional=[],
|
||||
forbidden=[
|
||||
"nextcloud_host",
|
||||
"nextcloud_username",
|
||||
"nextcloud_password",
|
||||
"enable_multi_user_basic_auth",
|
||||
"enable_token_exchange",
|
||||
"enable_offline_access",
|
||||
"vector_sync_enabled",
|
||||
"oidc_client_id",
|
||||
"oidc_client_secret",
|
||||
],
|
||||
conditional={},
|
||||
description="Stateless multi-tenant deployment for Smithery platform. "
|
||||
"Configuration comes from session URL parameters. "
|
||||
"No persistent storage, no OAuth, no vector sync.",
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def detect_auth_mode(settings: Settings) -> AuthMode:
|
||||
"""Detect authentication mode from configuration.
|
||||
|
||||
Mode detection priority (ADR-021):
|
||||
0. Explicit MCP_DEPLOYMENT_MODE (if set) - NEW in ADR-021
|
||||
1. Smithery (explicit flag)
|
||||
2. Token exchange (most specific OAuth mode)
|
||||
3. Multi-user BasicAuth
|
||||
4. Single-user BasicAuth
|
||||
5. OAuth single-audience (default OAuth mode)
|
||||
|
||||
Args:
|
||||
settings: Application settings
|
||||
|
||||
Returns:
|
||||
Detected AuthMode
|
||||
|
||||
Raises:
|
||||
ValueError: If explicit deployment_mode is invalid or conflicts with detected mode
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ADR-021: Check for explicit deployment mode first
|
||||
if settings.deployment_mode:
|
||||
mode_str = settings.deployment_mode.lower().strip()
|
||||
|
||||
# Map string to AuthMode enum
|
||||
mode_map = {
|
||||
"single_user_basic": AuthMode.SINGLE_USER_BASIC,
|
||||
"multi_user_basic": AuthMode.MULTI_USER_BASIC,
|
||||
"oauth_single_audience": AuthMode.OAUTH_SINGLE_AUDIENCE,
|
||||
"oauth_token_exchange": AuthMode.OAUTH_TOKEN_EXCHANGE,
|
||||
"smithery": AuthMode.SMITHERY_STATELESS,
|
||||
}
|
||||
|
||||
if mode_str not in mode_map:
|
||||
valid_modes = ", ".join(mode_map.keys())
|
||||
raise ValueError(
|
||||
f"Invalid MCP_DEPLOYMENT_MODE: '{settings.deployment_mode}'. "
|
||||
f"Valid values: {valid_modes}"
|
||||
)
|
||||
|
||||
explicit_mode = mode_map[mode_str]
|
||||
logger.info(f"Using explicit deployment mode: {explicit_mode.value}")
|
||||
return explicit_mode
|
||||
|
||||
# Auto-detection (existing behavior)
|
||||
# Check for Smithery mode (explicit environment variable)
|
||||
# Note: This checks the environment directly, not settings
|
||||
# because Smithery mode has no settings-based config
|
||||
if os.getenv("SMITHERY_DEPLOYMENT", "false").lower() == "true":
|
||||
return AuthMode.SMITHERY_STATELESS
|
||||
|
||||
# Check for token exchange (most specific OAuth mode)
|
||||
if settings.enable_token_exchange:
|
||||
return AuthMode.OAUTH_TOKEN_EXCHANGE
|
||||
|
||||
# Check for multi-user BasicAuth
|
||||
if settings.enable_multi_user_basic_auth:
|
||||
return AuthMode.MULTI_USER_BASIC
|
||||
|
||||
# Check for single-user BasicAuth (explicit credentials)
|
||||
if settings.nextcloud_username and settings.nextcloud_password:
|
||||
return AuthMode.SINGLE_USER_BASIC
|
||||
|
||||
# Default: OAuth single-audience mode
|
||||
# This is the safest multi-user mode (no credential storage)
|
||||
return AuthMode.OAUTH_SINGLE_AUDIENCE
|
||||
|
||||
|
||||
def validate_configuration(settings: Settings) -> tuple[AuthMode, list[str]]:
|
||||
"""Validate configuration for detected mode.
|
||||
|
||||
Args:
|
||||
settings: Application settings
|
||||
|
||||
Returns:
|
||||
Tuple of (detected_mode, list_of_errors)
|
||||
Empty list means valid configuration.
|
||||
"""
|
||||
mode = detect_auth_mode(settings)
|
||||
requirements = MODE_REQUIREMENTS[mode]
|
||||
errors: list[str] = []
|
||||
|
||||
logger.debug(f"Validating configuration for mode: {mode.value}")
|
||||
|
||||
# Check required variables
|
||||
for var in requirements.required:
|
||||
value = getattr(settings, var, None)
|
||||
if value is None or (isinstance(value, str) and not value.strip()):
|
||||
errors.append(
|
||||
f"[{mode.value}] Missing required configuration: {var.upper()}"
|
||||
)
|
||||
|
||||
# Check forbidden variables
|
||||
for var in requirements.forbidden:
|
||||
value = getattr(settings, var, None)
|
||||
# For bools, check if True (forbidden means must be False/unset)
|
||||
# For strings, check if non-empty
|
||||
is_set = False
|
||||
if isinstance(value, bool):
|
||||
is_set = value is True
|
||||
elif isinstance(value, str):
|
||||
is_set = bool(value.strip())
|
||||
elif value is not None:
|
||||
is_set = True
|
||||
|
||||
if is_set:
|
||||
errors.append(
|
||||
f"[{mode.value}] Forbidden configuration: {var.upper()} "
|
||||
f"should not be set in this mode"
|
||||
)
|
||||
|
||||
# Check conditional requirements
|
||||
for condition, required_vars in requirements.conditional.items():
|
||||
# Check if the condition is enabled
|
||||
condition_value = getattr(settings, condition, None)
|
||||
is_enabled = False
|
||||
|
||||
if isinstance(condition_value, bool):
|
||||
is_enabled = condition_value is True
|
||||
elif isinstance(condition_value, str):
|
||||
is_enabled = bool(condition_value.strip())
|
||||
elif condition_value is not None:
|
||||
is_enabled = True
|
||||
|
||||
if is_enabled:
|
||||
# Check that all required vars for this condition are set
|
||||
for var in required_vars:
|
||||
value = getattr(settings, var, None)
|
||||
|
||||
# For boolean requirements, check that they are True (not just set)
|
||||
if hasattr(Settings, var):
|
||||
field_type = type(getattr(Settings(), var, None))
|
||||
if field_type is bool:
|
||||
if value is not True:
|
||||
errors.append(
|
||||
f"[{mode.value}] {var.upper()} must be enabled when "
|
||||
f"{condition.upper()} is enabled"
|
||||
)
|
||||
continue
|
||||
|
||||
# For non-boolean requirements, check that they are set
|
||||
if value is None or (isinstance(value, str) and not value.strip()):
|
||||
errors.append(
|
||||
f"[{mode.value}] {var.upper()} is required when "
|
||||
f"{condition.upper()} is enabled"
|
||||
)
|
||||
|
||||
# Special validations for specific modes
|
||||
if mode == AuthMode.SINGLE_USER_BASIC:
|
||||
# Validate that NEXTCLOUD_HOST doesn't have trailing slash
|
||||
if settings.nextcloud_host and settings.nextcloud_host.endswith("/"):
|
||||
errors.append(
|
||||
f"[{mode.value}] NEXTCLOUD_HOST should not have trailing slash: "
|
||||
f"{settings.nextcloud_host}"
|
||||
)
|
||||
|
||||
if mode in [
|
||||
AuthMode.OAUTH_SINGLE_AUDIENCE,
|
||||
AuthMode.OAUTH_TOKEN_EXCHANGE,
|
||||
]:
|
||||
# If OAuth credentials not provided, DCR must be available
|
||||
# (This is a runtime check, not a config check, so we just warn)
|
||||
if not settings.oidc_client_id or not settings.oidc_client_secret:
|
||||
logger.info(
|
||||
f"[{mode.value}] OAuth credentials not configured. "
|
||||
"Will attempt Dynamic Client Registration (DCR) at startup."
|
||||
)
|
||||
|
||||
if mode == AuthMode.MULTI_USER_BASIC:
|
||||
# If background operations enabled, check for OAuth credentials (for app password retrieval)
|
||||
# Allow DCR as fallback, just like OAuth modes
|
||||
if settings.enable_offline_access:
|
||||
if not settings.oidc_client_id or not settings.oidc_client_secret:
|
||||
logger.info(
|
||||
f"[{mode.value}] OAuth credentials not configured. "
|
||||
"Will attempt Dynamic Client Registration (DCR) at startup "
|
||||
"(required for app password retrieval via Astrolabe)."
|
||||
)
|
||||
|
||||
# Note: Vector sync no longer requires explicit ENABLE_OFFLINE_ACCESS setting
|
||||
# ENABLE_SEMANTIC_SEARCH (formerly VECTOR_SYNC_ENABLED) automatically enables
|
||||
# background operations in multi-user modes via smart dependency resolution
|
||||
# in config.py
|
||||
|
||||
# Note: Embedding provider validation removed - Simple provider is always
|
||||
# available as fallback (ADR-015). Users can optionally configure Ollama or OpenAI
|
||||
# for better quality embeddings.
|
||||
|
||||
return mode, errors
|
||||
|
||||
|
||||
def get_mode_summary(mode: AuthMode) -> str:
|
||||
"""Get human-readable summary of a deployment mode.
|
||||
|
||||
Args:
|
||||
mode: Deployment mode
|
||||
|
||||
Returns:
|
||||
Multi-line string describing the mode
|
||||
"""
|
||||
requirements = MODE_REQUIREMENTS[mode]
|
||||
|
||||
summary_lines = [
|
||||
f"Mode: {mode.value}",
|
||||
f"Description: {requirements.description}",
|
||||
"",
|
||||
"Required configuration:",
|
||||
]
|
||||
|
||||
if requirements.required:
|
||||
for var in requirements.required:
|
||||
summary_lines.append(f" - {var.upper()}")
|
||||
else:
|
||||
summary_lines.append(" (none - configured via session)")
|
||||
|
||||
summary_lines.append("")
|
||||
summary_lines.append("Optional configuration:")
|
||||
|
||||
if requirements.optional:
|
||||
for var in requirements.optional:
|
||||
summary_lines.append(f" - {var.upper()}")
|
||||
else:
|
||||
summary_lines.append(" (none)")
|
||||
|
||||
if requirements.conditional:
|
||||
summary_lines.append("")
|
||||
summary_lines.append("Conditional requirements:")
|
||||
for condition, vars in requirements.conditional.items():
|
||||
summary_lines.append(f" When {condition.upper()} is enabled:")
|
||||
for var in vars:
|
||||
summary_lines.append(f" - {var.upper()}")
|
||||
|
||||
return "\n".join(summary_lines)
|
||||
@@ -1,37 +1,21 @@
|
||||
"""Helper functions for accessing context in MCP tools."""
|
||||
|
||||
import logging
|
||||
|
||||
from httpx import BasicAuth
|
||||
from mcp.server.fastmcp import Context
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
from nextcloud_mcp_server.config import (
|
||||
DeploymentMode,
|
||||
get_deployment_mode,
|
||||
get_settings,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
|
||||
async def get_client(ctx: Context) -> NextcloudClient:
|
||||
"""
|
||||
Get the appropriate Nextcloud client based on authentication mode.
|
||||
|
||||
ADR-016 compliant implementation supporting three deployment modes:
|
||||
|
||||
1. Smithery stateless mode (SMITHERY_DEPLOYMENT=true):
|
||||
Create client from session configuration (nextcloud_url, username, app_password)
|
||||
No persistent state - client created per-request from Smithery session config.
|
||||
|
||||
2. BasicAuth mode: Returns shared client from lifespan context
|
||||
|
||||
3. OAuth mode:
|
||||
a. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
|
||||
Token already contains both MCP and Nextcloud audiences - use directly
|
||||
b. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
|
||||
Exchange MCP token for Nextcloud token via RFC 8693
|
||||
ADR-005 compliant implementation supporting two modes:
|
||||
1. BasicAuth mode: Returns shared client from lifespan context
|
||||
2. Multi-audience mode (ENABLE_TOKEN_EXCHANGE=false, default):
|
||||
Token already contains both MCP and Nextcloud audiences - use directly
|
||||
3. Token exchange mode (ENABLE_TOKEN_EXCHANGE=true):
|
||||
Exchange MCP token for Nextcloud token via RFC 8693
|
||||
|
||||
SECURITY: Token passthrough has been REMOVED. All OAuth modes validate
|
||||
proper token audiences per MCP Security Best Practices specification.
|
||||
@@ -40,7 +24,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
by the MCP server via @require_scopes decorator, not by the IdP.
|
||||
|
||||
This function automatically detects the authentication mode by checking
|
||||
the deployment mode and type of the lifespan context.
|
||||
the type of the lifespan context.
|
||||
|
||||
Args:
|
||||
ctx: MCP request context
|
||||
@@ -50,7 +34,6 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
|
||||
Raises:
|
||||
AttributeError: If context doesn't contain expected data
|
||||
ValueError: If Smithery mode but session config is missing required fields
|
||||
|
||||
Example:
|
||||
```python
|
||||
@@ -60,18 +43,7 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
return await client.capabilities()
|
||||
```
|
||||
"""
|
||||
deployment_mode = get_deployment_mode()
|
||||
|
||||
# ADR-016: Smithery stateless mode - create client from session config
|
||||
if deployment_mode == DeploymentMode.SMITHERY_STATELESS:
|
||||
return _get_client_from_session_config(ctx)
|
||||
|
||||
settings = get_settings()
|
||||
|
||||
# Multi-user BasicAuth pass-through mode - extract credentials from request
|
||||
if settings.enable_multi_user_basic_auth:
|
||||
return _get_client_from_basic_auth(ctx)
|
||||
|
||||
lifespan_ctx = ctx.request_context.lifespan_context
|
||||
|
||||
# BasicAuth mode - use shared client (no token exchange)
|
||||
@@ -103,146 +75,3 @@ async def get_client(ctx: Context) -> NextcloudClient:
|
||||
f"Lifespan context does not have 'client' or 'nextcloud_host' attribute. "
|
||||
f"Type: {type(lifespan_ctx)}"
|
||||
)
|
||||
|
||||
|
||||
def _get_client_from_session_config(ctx: Context) -> NextcloudClient:
|
||||
"""
|
||||
Create NextcloudClient from Smithery session configuration.
|
||||
|
||||
ADR-016: In Smithery stateless mode, each request includes session config
|
||||
with the user's Nextcloud credentials. This function creates a fresh client
|
||||
for each request - no state is persisted between requests.
|
||||
|
||||
For container runtime, config is extracted from URL query parameters by
|
||||
SmitheryConfigMiddleware and stored in a context variable.
|
||||
|
||||
Expected session config fields (from Smithery configSchema):
|
||||
- nextcloud_url: str - Nextcloud instance URL (required)
|
||||
- username: str - Nextcloud username (required)
|
||||
- app_password: str - Nextcloud app password (required)
|
||||
|
||||
Args:
|
||||
ctx: MCP request context (not used directly for Smithery config)
|
||||
|
||||
Returns:
|
||||
NextcloudClient configured with session credentials
|
||||
|
||||
Raises:
|
||||
ValueError: If required session config fields are missing
|
||||
"""
|
||||
# ADR-016: Get session config from context variable (set by SmitheryConfigMiddleware)
|
||||
from nextcloud_mcp_server.app import get_smithery_session_config
|
||||
|
||||
session_config = get_smithery_session_config()
|
||||
|
||||
if session_config is None:
|
||||
raise ValueError(
|
||||
"Session configuration required in Smithery mode. "
|
||||
"Ensure nextcloud_url, username, and app_password are provided as URL query parameters."
|
||||
)
|
||||
|
||||
# Extract required fields - config is always a dict from SmitheryConfigMiddleware
|
||||
nextcloud_url = session_config.get("nextcloud_url")
|
||||
username = session_config.get("username")
|
||||
app_password = session_config.get("app_password")
|
||||
|
||||
# Validate required fields
|
||||
missing_fields = []
|
||||
if not nextcloud_url:
|
||||
missing_fields.append("nextcloud_url")
|
||||
if not username:
|
||||
missing_fields.append("username")
|
||||
if not app_password:
|
||||
missing_fields.append("app_password")
|
||||
|
||||
if missing_fields:
|
||||
raise ValueError(
|
||||
f"Missing required session config fields: {', '.join(missing_fields)}. "
|
||||
f"Configure these in the Smithery connection settings."
|
||||
)
|
||||
|
||||
# Type assertions after validation (for type checker)
|
||||
# These are guaranteed to be str after the missing_fields check above
|
||||
assert nextcloud_url is not None
|
||||
assert username is not None
|
||||
assert app_password is not None
|
||||
|
||||
# Validate URL format
|
||||
if not nextcloud_url.startswith(("http://", "https://")):
|
||||
raise ValueError(
|
||||
f"Invalid nextcloud_url: {nextcloud_url}. "
|
||||
f"Must start with http:// or https://"
|
||||
)
|
||||
|
||||
logger.debug(f"Creating Smithery client for {nextcloud_url} as {username}")
|
||||
|
||||
# Create client with session credentials using BasicAuth
|
||||
return NextcloudClient(
|
||||
base_url=nextcloud_url,
|
||||
username=username,
|
||||
auth=BasicAuth(username, app_password),
|
||||
)
|
||||
|
||||
|
||||
def _get_client_from_basic_auth(ctx: Context) -> NextcloudClient:
|
||||
"""
|
||||
Create NextcloudClient from BasicAuth credentials in request headers.
|
||||
|
||||
For multi-user BasicAuth pass-through mode, this function extracts
|
||||
username/password from the Authorization: Basic header (stored by
|
||||
BasicAuthMiddleware) and creates a client that passes these credentials
|
||||
through to Nextcloud APIs.
|
||||
|
||||
The credentials are NOT stored persistently - they exist only for the
|
||||
duration of this request (stateless).
|
||||
|
||||
Args:
|
||||
ctx: MCP request context with basic_auth in request state
|
||||
|
||||
Returns:
|
||||
NextcloudClient configured with BasicAuth credentials
|
||||
|
||||
Raises:
|
||||
ValueError: If BasicAuth credentials not found in request or if
|
||||
NEXTCLOUD_HOST is not configured
|
||||
"""
|
||||
settings = get_settings()
|
||||
|
||||
# Validate that NEXTCLOUD_HOST is configured
|
||||
if not settings.nextcloud_host:
|
||||
raise ValueError(
|
||||
"NEXTCLOUD_HOST environment variable must be set for multi-user BasicAuth mode"
|
||||
)
|
||||
|
||||
# Extract BasicAuth credentials from request state (set by BasicAuthMiddleware)
|
||||
# Access scope through the request object
|
||||
scope = getattr(ctx.request_context.request, "scope", None)
|
||||
if scope is None:
|
||||
raise ValueError("Request scope not available in context")
|
||||
|
||||
request_state = scope.get("state", {})
|
||||
basic_auth = request_state.get("basic_auth")
|
||||
|
||||
if not basic_auth:
|
||||
raise ValueError(
|
||||
"BasicAuth credentials not found in request. "
|
||||
"Ensure Authorization: Basic header is provided with valid credentials."
|
||||
)
|
||||
|
||||
username = basic_auth.get("username")
|
||||
password = basic_auth.get("password")
|
||||
|
||||
if not username or not password:
|
||||
raise ValueError("Invalid BasicAuth credentials - missing username or password")
|
||||
|
||||
logger.debug(
|
||||
f"Creating multi-user BasicAuth client for {settings.nextcloud_host} as {username}"
|
||||
)
|
||||
|
||||
# Create client that passes BasicAuth credentials through to Nextcloud
|
||||
# settings.nextcloud_host is guaranteed to be str after the check above
|
||||
return NextcloudClient(
|
||||
base_url=settings.nextcloud_host,
|
||||
username=username,
|
||||
auth=BasicAuth(username, password),
|
||||
)
|
||||
|
||||
@@ -1,18 +1,12 @@
|
||||
"""Document processing plugins for extracting text from various file formats."""
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
from .pymupdf import PyMuPDFProcessor
|
||||
from .registry import ProcessorRegistry, get_registry
|
||||
|
||||
# Register processors at module initialization
|
||||
_registry = get_registry()
|
||||
_registry.register(PyMuPDFProcessor(), priority=10)
|
||||
|
||||
__all__ = [
|
||||
"DocumentProcessor",
|
||||
"ProcessingResult",
|
||||
"ProcessorError",
|
||||
"ProcessorRegistry",
|
||||
"get_registry",
|
||||
"PyMuPDFProcessor",
|
||||
]
|
||||
|
||||
@@ -1,253 +0,0 @@
|
||||
"""Document processor using PyMuPDF (fitz) library."""
|
||||
|
||||
import logging
|
||||
import pathlib
|
||||
import tempfile
|
||||
from collections.abc import Awaitable, Callable
|
||||
from typing import Any, Optional
|
||||
|
||||
# NOTE: Do NOT call pymupdf.layout.activate() here!
|
||||
# It changes the behavior of pymupdf4llm.to_markdown() when page_chunks=True,
|
||||
# causing it to return a string instead of a list[dict].
|
||||
# See: https://github.com/pymupdf/pymupdf4llm/issues/323
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
from .base import DocumentProcessor, ProcessingResult, ProcessorError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PyMuPDFProcessor(DocumentProcessor):
|
||||
"""Document processor using PyMuPDF library for PDF processing.
|
||||
|
||||
PyMuPDF (fitz) is a fast, local PDF processing library that extracts text,
|
||||
metadata, and images without requiring external API calls.
|
||||
|
||||
Features:
|
||||
- Fast text extraction with layout preservation
|
||||
- PDF metadata extraction (title, author, creation date, page count)
|
||||
- Image extraction for future multimodal support
|
||||
- Page number tracking for precise citations
|
||||
"""
|
||||
|
||||
SUPPORTED_TYPES = {
|
||||
"application/pdf",
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
extract_images: bool = True,
|
||||
image_dir: Optional[str | pathlib.Path] = None,
|
||||
):
|
||||
"""Initialize PyMuPDF processor.
|
||||
|
||||
Args:
|
||||
extract_images: Whether to extract embedded images from PDFs
|
||||
image_dir: Directory to store extracted images (defaults to temp directory)
|
||||
"""
|
||||
self.extract_images = extract_images
|
||||
|
||||
if image_dir is None:
|
||||
self.image_dir = pathlib.Path(tempfile.gettempdir()) / "pdf-images"
|
||||
else:
|
||||
self.image_dir = pathlib.Path(image_dir)
|
||||
|
||||
# Create image directory if it doesn't exist
|
||||
if self.extract_images:
|
||||
self.image_dir.mkdir(exist_ok=True, parents=True)
|
||||
logger.info(
|
||||
f"Initialized PyMuPDFProcessor with image extraction to {self.image_dir}"
|
||||
)
|
||||
else:
|
||||
logger.info("Initialized PyMuPDFProcessor without image extraction")
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "pymupdf"
|
||||
|
||||
@property
|
||||
def supported_mime_types(self) -> set[str]:
|
||||
return self.SUPPORTED_TYPES
|
||||
|
||||
async def process(
|
||||
self,
|
||||
content: bytes,
|
||||
content_type: str,
|
||||
filename: Optional[str] = None,
|
||||
options: Optional[dict[str, Any]] = None,
|
||||
progress_callback: Optional[
|
||||
Callable[[float, Optional[float], Optional[str]], Awaitable[None]]
|
||||
] = None,
|
||||
) -> ProcessingResult:
|
||||
"""Process a PDF document and extract text, metadata, and images.
|
||||
|
||||
Args:
|
||||
content: PDF document bytes
|
||||
content_type: MIME type (should be application/pdf)
|
||||
filename: Optional filename for better error messages
|
||||
options: Processing options (currently unused)
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
ProcessingResult with extracted text and metadata
|
||||
|
||||
Raises:
|
||||
ProcessorError: If PDF processing fails
|
||||
"""
|
||||
import anyio
|
||||
|
||||
try:
|
||||
if progress_callback:
|
||||
await progress_callback(0, 100, "Opening PDF document")
|
||||
|
||||
# Open document and extract metadata in thread
|
||||
doc = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: pymupdf.open("pdf", content)
|
||||
)
|
||||
|
||||
metadata = self._extract_metadata(doc, filename)
|
||||
metadata["file_size"] = len(content)
|
||||
page_count = doc.page_count
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(10, 100, f"Extracting {page_count} pages")
|
||||
|
||||
# Prepare image directory if needed
|
||||
pdf_image_dir = None
|
||||
if self.extract_images:
|
||||
pdf_id = filename.replace("/", "_") if filename else "unknown"
|
||||
pdf_image_dir = self.image_dir / pdf_id
|
||||
pdf_image_dir.mkdir(exist_ok=True, parents=True)
|
||||
|
||||
# Extract all pages in a single call with page_chunks=True
|
||||
def do_extract() -> list[dict[str, Any]]:
|
||||
# When page_chunks=True, to_markdown returns list[dict] not str
|
||||
return pymupdf4llm.to_markdown( # type: ignore[return-value]
|
||||
doc,
|
||||
write_images=self.extract_images,
|
||||
image_path=pdf_image_dir if self.extract_images else None,
|
||||
page_chunks=True,
|
||||
)
|
||||
|
||||
page_chunks: list[dict[str, Any]] = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
do_extract
|
||||
)
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(90, 100, "Building result")
|
||||
|
||||
# Extract page texts and build boundaries from chunks
|
||||
page_texts: list[str] = []
|
||||
page_boundaries: list[dict[str, Any]] = []
|
||||
current_offset = 0
|
||||
for chunk in page_chunks:
|
||||
text = chunk.get("text", "")
|
||||
page_num = chunk.get("metadata", {}).get("page", len(page_texts) + 1)
|
||||
page_texts.append(text)
|
||||
page_boundaries.append(
|
||||
{
|
||||
"page": page_num,
|
||||
"start_offset": current_offset,
|
||||
"end_offset": current_offset + len(text),
|
||||
}
|
||||
)
|
||||
current_offset += len(text)
|
||||
|
||||
# Collect image paths
|
||||
image_paths = []
|
||||
if pdf_image_dir and pdf_image_dir.exists():
|
||||
image_paths = [str(p) for p in pdf_image_dir.glob("*")]
|
||||
|
||||
# Build final text and metadata
|
||||
md_text = "".join(page_texts)
|
||||
metadata["has_images"] = len(image_paths) > 0
|
||||
if image_paths:
|
||||
metadata["image_count"] = len(image_paths)
|
||||
metadata["image_paths"] = image_paths
|
||||
metadata["page_boundaries"] = page_boundaries
|
||||
|
||||
# Close document
|
||||
doc.close()
|
||||
|
||||
if progress_callback:
|
||||
await progress_callback(100, 100, "Processing complete")
|
||||
|
||||
logger.info(
|
||||
f"Successfully processed PDF {filename or '<bytes>'}: "
|
||||
f"{metadata['page_count']} pages, {len(md_text)} chars, "
|
||||
f"{metadata.get('image_count', 0)} images"
|
||||
)
|
||||
|
||||
return ProcessingResult(
|
||||
text=md_text,
|
||||
metadata=metadata,
|
||||
processor=self.name,
|
||||
success=True,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Failed to process PDF {filename or '<bytes>'}: {e}"
|
||||
logger.error(error_msg, exc_info=True)
|
||||
raise ProcessorError(error_msg) from e
|
||||
|
||||
def _extract_metadata(
|
||||
self, doc: pymupdf.Document, filename: Optional[str]
|
||||
) -> dict[str, Any]:
|
||||
"""Extract metadata from PDF document.
|
||||
|
||||
Args:
|
||||
doc: Opened PyMuPDF document
|
||||
filename: Optional filename
|
||||
|
||||
Returns:
|
||||
Dictionary with PDF metadata
|
||||
"""
|
||||
metadata: dict[str, Any] = {}
|
||||
|
||||
# Basic document info
|
||||
metadata["page_count"] = doc.page_count
|
||||
metadata["format"] = "PDF 1." + str(
|
||||
doc.pdf_version() if hasattr(doc, "pdf_version") else "?" # type: ignore[call-non-callable]
|
||||
)
|
||||
|
||||
if filename:
|
||||
metadata["filename"] = filename
|
||||
|
||||
# Extract PDF metadata dictionary
|
||||
pdf_metadata = doc.metadata
|
||||
if pdf_metadata:
|
||||
# Standard PDF metadata fields
|
||||
if pdf_metadata.get("title"):
|
||||
metadata["title"] = pdf_metadata["title"]
|
||||
if pdf_metadata.get("author"):
|
||||
metadata["author"] = pdf_metadata["author"]
|
||||
if pdf_metadata.get("subject"):
|
||||
metadata["subject"] = pdf_metadata["subject"]
|
||||
if pdf_metadata.get("keywords"):
|
||||
metadata["keywords"] = pdf_metadata["keywords"]
|
||||
if pdf_metadata.get("creator"):
|
||||
metadata["creator"] = pdf_metadata["creator"]
|
||||
if pdf_metadata.get("producer"):
|
||||
metadata["producer"] = pdf_metadata["producer"]
|
||||
if pdf_metadata.get("creationDate"):
|
||||
metadata["creation_date"] = pdf_metadata["creationDate"]
|
||||
if pdf_metadata.get("modDate"):
|
||||
metadata["modification_date"] = pdf_metadata["modDate"]
|
||||
|
||||
return metadata
|
||||
|
||||
async def health_check(self) -> bool:
|
||||
"""Check if PyMuPDF is available and working.
|
||||
|
||||
Returns:
|
||||
True if processor is ready to use
|
||||
"""
|
||||
try:
|
||||
# Try to create a simple PDF in memory
|
||||
test_doc = pymupdf.open()
|
||||
test_doc.close()
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"PyMuPDF health check failed: {e}")
|
||||
return False
|
||||
@@ -37,9 +37,7 @@ class BM25SparseEmbeddingProvider:
|
||||
|
||||
def encode(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text (synchronous).
|
||||
|
||||
Note: For async contexts, prefer encode_async() to avoid blocking the event loop.
|
||||
Generate BM25 sparse embedding for a single text.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
@@ -55,24 +53,7 @@ class BM25SparseEmbeddingProvider:
|
||||
"values": sparse_embedding.values.tolist(),
|
||||
}
|
||||
|
||||
async def encode_async(self, text: str) -> dict[str, Any]:
|
||||
"""
|
||||
Generate BM25 sparse embedding for a single text (async).
|
||||
|
||||
Runs CPU-bound BM25 encoding in thread pool to avoid blocking the event loop.
|
||||
|
||||
Args:
|
||||
text: Input text to encode
|
||||
|
||||
Returns:
|
||||
Dictionary with 'indices' and 'values' keys for Qdrant sparse vector
|
||||
"""
|
||||
import anyio
|
||||
|
||||
# Run CPU-bound BM25 encoding in thread pool
|
||||
return await anyio.to_thread.run_sync(lambda: self.encode(text)) # type: ignore[attr-defined]
|
||||
|
||||
async def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
|
||||
def encode_batch(self, texts: list[str]) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Generate BM25 sparse embeddings for multiple texts (batched).
|
||||
|
||||
@@ -82,12 +63,7 @@ class BM25SparseEmbeddingProvider:
|
||||
Returns:
|
||||
List of dictionaries with 'indices' and 'values' for each text
|
||||
"""
|
||||
import anyio
|
||||
|
||||
# Run CPU-bound BM25 encoding in thread pool to avoid blocking event loop
|
||||
sparse_embeddings = await anyio.to_thread.run_sync( # type: ignore[attr-defined]
|
||||
lambda: list(self.model.embed(texts))
|
||||
)
|
||||
sparse_embeddings = list(self.model.embed(texts))
|
||||
|
||||
return [
|
||||
{
|
||||
|
||||
@@ -1,192 +0,0 @@
|
||||
"""Database migration utilities for nextcloud-mcp-server.
|
||||
|
||||
This module provides helper functions for managing Alembic database migrations
|
||||
programmatically. It enables automatic migration on application startup and
|
||||
provides CLI integration.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from alembic.config import Config
|
||||
|
||||
from alembic import command
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_alembic_config(database_path: str | Path | None = None) -> Config:
|
||||
"""
|
||||
Get Alembic configuration for programmatic use.
|
||||
|
||||
Works in both development and installed (Docker) modes by using
|
||||
package location instead of alembic.ini file.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file. If None, uses default
|
||||
(/app/data/tokens.db for Docker)
|
||||
|
||||
Returns:
|
||||
Alembic Config object configured for the specified database
|
||||
"""
|
||||
from nextcloud_mcp_server import alembic as alembic_package
|
||||
|
||||
# Use package location (works in both editable and installed modes)
|
||||
if alembic_package.__file__ is None:
|
||||
raise RuntimeError("alembic package __file__ is None")
|
||||
script_location = Path(alembic_package.__file__).parent
|
||||
|
||||
# Create config programmatically (no alembic.ini needed at runtime)
|
||||
config = Config()
|
||||
config.set_main_option("script_location", str(script_location))
|
||||
config.set_main_option("path_separator", "os") # Suppress deprecation warning
|
||||
|
||||
# Set database URL
|
||||
if database_path:
|
||||
db_path = Path(database_path).resolve()
|
||||
else:
|
||||
db_path = Path("/app/data/tokens.db") # Default for Docker
|
||||
|
||||
url = f"sqlite+aiosqlite:///{db_path}"
|
||||
config.set_main_option("sqlalchemy.url", url)
|
||||
|
||||
logger.debug(f"Alembic script location: {script_location}")
|
||||
logger.debug(f"Database: {db_path}")
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def upgrade_database(
|
||||
database_path: str | Path | None = None, revision: str = "head"
|
||||
) -> None:
|
||||
"""
|
||||
Upgrade database to a specific revision.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Target revision (default: "head" for latest)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.info(f"Upgrading database to revision: {revision}")
|
||||
command.upgrade(config, revision)
|
||||
logger.info("Database upgrade completed successfully")
|
||||
|
||||
|
||||
def downgrade_database(
|
||||
database_path: str | Path | None = None, revision: str = "-1"
|
||||
) -> None:
|
||||
"""
|
||||
Downgrade database to a specific revision.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Target revision (default: "-1" for previous version)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.warning(f"Downgrading database to revision: {revision}")
|
||||
command.downgrade(config, revision)
|
||||
logger.info("Database downgrade completed successfully")
|
||||
|
||||
|
||||
def get_current_revision(database_path: str | Path | None = None) -> str | None:
|
||||
"""
|
||||
Get the current database revision by directly querying the alembic_version table.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
|
||||
Returns:
|
||||
Current revision ID or None if not versioned
|
||||
"""
|
||||
import sqlite3
|
||||
|
||||
if database_path is None:
|
||||
database_path = "/app/data/tokens.db"
|
||||
|
||||
db_path = Path(database_path).resolve()
|
||||
|
||||
if not db_path.exists():
|
||||
logger.debug(f"Database does not exist: {db_path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
# Query alembic_version table directly
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Check if alembic_version table exists
|
||||
cursor.execute(
|
||||
"SELECT name FROM sqlite_master WHERE type='table' AND name='alembic_version'"
|
||||
)
|
||||
has_table = cursor.fetchone() is not None
|
||||
|
||||
if not has_table:
|
||||
conn.close()
|
||||
return None
|
||||
|
||||
# Get current version
|
||||
cursor.execute("SELECT version_num FROM alembic_version")
|
||||
row = cursor.fetchone()
|
||||
conn.close()
|
||||
|
||||
return row[0] if row else None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to get current revision: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def stamp_database(
|
||||
database_path: str | Path | None = None, revision: str = "head"
|
||||
) -> None:
|
||||
"""
|
||||
Stamp database with a specific revision without running migrations.
|
||||
|
||||
This is useful for marking existing databases that were created before
|
||||
Alembic was introduced. It tells Alembic "this database is at revision X"
|
||||
without actually running the migration.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
revision: Revision to stamp (default: "head" for latest)
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
logger.info(f"Stamping database with revision: {revision}")
|
||||
command.stamp(config, revision)
|
||||
logger.info("Database stamped successfully")
|
||||
|
||||
|
||||
def show_migration_history(database_path: str | Path | None = None) -> None:
|
||||
"""
|
||||
Display migration history.
|
||||
|
||||
Args:
|
||||
database_path: Path to SQLite database file
|
||||
"""
|
||||
config = get_alembic_config(database_path)
|
||||
command.history(config, verbose=True)
|
||||
|
||||
|
||||
def create_migration(message: str, autogenerate: bool = False) -> None:
|
||||
"""
|
||||
Create a new migration script.
|
||||
|
||||
Args:
|
||||
message: Description of the migration
|
||||
autogenerate: Whether to attempt auto-generation (requires SQLAlchemy models)
|
||||
|
||||
Note:
|
||||
Since we don't use SQLAlchemy models, autogenerate will be disabled
|
||||
and migrations must be written manually.
|
||||
"""
|
||||
config = get_alembic_config()
|
||||
logger.info(f"Creating new migration: {message}")
|
||||
|
||||
if autogenerate:
|
||||
logger.warning(
|
||||
"Auto-generation is not supported (no SQLAlchemy models). "
|
||||
"Migration will be created with empty upgrade/downgrade functions."
|
||||
)
|
||||
|
||||
command.revision(config, message=message, autogenerate=False)
|
||||
logger.info("Migration created successfully. Edit the file to add SQL statements.")
|
||||
@@ -1,170 +0,0 @@
|
||||
"""Pydantic models for Nextcloud News app responses."""
|
||||
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
from .base import BaseResponse
|
||||
|
||||
|
||||
class NewsFolder(BaseModel):
|
||||
"""Model for a News folder."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Folder ID")
|
||||
name: str = Field(description="Folder name")
|
||||
|
||||
|
||||
class NewsFeed(BaseModel):
|
||||
"""Model for a News feed (RSS/Atom subscription)."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Feed ID")
|
||||
url: str = Field(description="Feed URL")
|
||||
title: str = Field(description="Feed title")
|
||||
favicon_link: str | None = Field(
|
||||
None, alias="faviconLink", description="Favicon URL"
|
||||
)
|
||||
link: str | None = Field(None, description="Website link")
|
||||
added: int = Field(description="Unix timestamp when feed was added")
|
||||
folder_id: int | None = Field(
|
||||
None, alias="folderId", description="Parent folder ID"
|
||||
)
|
||||
unread_count: int = Field(
|
||||
0, alias="unreadCount", description="Number of unread items"
|
||||
)
|
||||
ordering: int = Field(
|
||||
0, description="Feed ordering (0=default, 1=oldest, 2=newest)"
|
||||
)
|
||||
pinned: bool = Field(False, description="Whether feed is pinned to top")
|
||||
update_error_count: int = Field(
|
||||
0, alias="updateErrorCount", description="Consecutive update failures"
|
||||
)
|
||||
last_update_error: str | None = Field(
|
||||
None, alias="lastUpdateError", description="Last update error message"
|
||||
)
|
||||
|
||||
@property
|
||||
def has_errors(self) -> bool:
|
||||
"""Check if feed has update errors."""
|
||||
return self.update_error_count > 0
|
||||
|
||||
|
||||
class NewsItem(BaseModel):
|
||||
"""Model for a News item (article) with full content."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
guid: str = Field(description="Globally unique identifier")
|
||||
guid_hash: str = Field(alias="guidHash", description="MD5 hash of GUID")
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
title: str = Field(description="Article title")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
body: str | None = Field(None, description="Article content (HTML)")
|
||||
enclosure_mime: str | None = Field(
|
||||
None, alias="enclosureMime", description="Enclosure MIME type"
|
||||
)
|
||||
enclosure_link: str | None = Field(
|
||||
None, alias="enclosureLink", description="Enclosure URL"
|
||||
)
|
||||
media_thumbnail: str | None = Field(
|
||||
None, alias="mediaThumbnail", description="Media thumbnail URL"
|
||||
)
|
||||
media_description: str | None = Field(
|
||||
None, alias="mediaDescription", description="Media description"
|
||||
)
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
rtl: bool = Field(False, description="Right-to-left text")
|
||||
last_modified: int = Field(
|
||||
alias="lastModified", description="Last modification timestamp"
|
||||
)
|
||||
fingerprint: str | None = Field(
|
||||
None, description="Content fingerprint for deduplication"
|
||||
)
|
||||
content_hash: str | None = Field(
|
||||
None, alias="contentHash", description="Content hash"
|
||||
)
|
||||
|
||||
|
||||
class NewsItemSummary(BaseModel):
|
||||
"""Lightweight model for News item list responses."""
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
id: int = Field(description="Item ID")
|
||||
title: str = Field(description="Article title")
|
||||
feed_id: int = Field(alias="feedId", description="Parent feed ID")
|
||||
unread: bool = Field(True, description="Whether item is unread")
|
||||
starred: bool = Field(False, description="Whether item is starred")
|
||||
pub_date: int | None = Field(
|
||||
None, alias="pubDate", description="Publication timestamp"
|
||||
)
|
||||
url: str | None = Field(None, description="Article URL")
|
||||
author: str | None = Field(None, description="Article author")
|
||||
|
||||
|
||||
class NewsStatus(BaseModel):
|
||||
"""Model for News app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
|
||||
|
||||
# --- Response Models ---
|
||||
|
||||
|
||||
class ListFoldersResponse(BaseResponse):
|
||||
"""Response model for listing folders."""
|
||||
|
||||
results: List[NewsFolder] = Field(description="List of folders")
|
||||
total_count: int = Field(description="Total number of folders")
|
||||
|
||||
|
||||
class ListFeedsResponse(BaseResponse):
|
||||
"""Response model for listing feeds."""
|
||||
|
||||
results: List[NewsFeed] = Field(description="List of feeds")
|
||||
starred_count: int = Field(0, description="Number of starred items")
|
||||
newest_item_id: int | None = Field(None, description="ID of newest item")
|
||||
total_count: int = Field(description="Total number of feeds")
|
||||
|
||||
|
||||
class ListItemsResponse(BaseResponse):
|
||||
"""Response model for listing items."""
|
||||
|
||||
results: List[NewsItemSummary] = Field(description="List of items")
|
||||
total_count: int = Field(description="Number of items returned")
|
||||
has_more: bool = Field(False, description="Whether more items exist")
|
||||
oldest_id: int | None = Field(None, description="Oldest item ID (for pagination)")
|
||||
|
||||
|
||||
class GetItemResponse(BaseResponse):
|
||||
"""Response model for getting a single item."""
|
||||
|
||||
item: NewsItem = Field(description="Full item details")
|
||||
|
||||
|
||||
class FeedHealthResponse(BaseResponse):
|
||||
"""Response model for feed health status."""
|
||||
|
||||
feed_id: int = Field(description="Feed ID")
|
||||
title: str = Field(description="Feed title")
|
||||
url: str = Field(description="Feed URL")
|
||||
has_errors: bool = Field(description="Whether feed has update errors")
|
||||
error_count: int = Field(description="Number of consecutive errors")
|
||||
last_error: str | None = Field(None, description="Last error message")
|
||||
|
||||
|
||||
class GetStatusResponse(BaseResponse):
|
||||
"""Response model for app status."""
|
||||
|
||||
version: str = Field(description="News app version")
|
||||
warnings: dict = Field(default_factory=dict, description="Configuration warnings")
|
||||
@@ -10,7 +10,7 @@ from .base import BaseResponse
|
||||
class SemanticSearchResult(BaseModel):
|
||||
"""Model for semantic search results with additional metadata."""
|
||||
|
||||
id: int = Field(description="Document ID (int for all document types)")
|
||||
id: int = Field(description="Document ID")
|
||||
doc_type: str = Field(
|
||||
description="Document type (note, calendar_event, deck_card, etc.)"
|
||||
)
|
||||
@@ -35,32 +35,6 @@ class SemanticSearchResult(BaseModel):
|
||||
chunk_end_offset: Optional[int] = Field(
|
||||
default=None, description="Character position where chunk ends in document"
|
||||
)
|
||||
page_number: Optional[int] = Field(
|
||||
default=None, description="Page number for PDF documents"
|
||||
)
|
||||
page_count: Optional[int] = Field(
|
||||
default=None, description="Total number of pages in PDF document"
|
||||
)
|
||||
# Context expansion fields (optional, populated when include_context=True)
|
||||
has_context_expansion: bool = Field(
|
||||
default=False, description="Whether context expansion was performed"
|
||||
)
|
||||
marked_text: Optional[str] = Field(
|
||||
default=None,
|
||||
description="Full text with position markers around matched chunk",
|
||||
)
|
||||
before_context: Optional[str] = Field(
|
||||
default=None, description="Text before the matched chunk"
|
||||
)
|
||||
after_context: Optional[str] = Field(
|
||||
default=None, description="Text after the matched chunk"
|
||||
)
|
||||
has_before_truncation: Optional[bool] = Field(
|
||||
default=None, description="Whether before_context was truncated"
|
||||
)
|
||||
has_after_truncation: Optional[bool] = Field(
|
||||
default=None, description="Whether after_context was truncated"
|
||||
)
|
||||
|
||||
|
||||
class SemanticSearchResponse(BaseResponse):
|
||||
|
||||
@@ -37,7 +37,7 @@ class HealthCheckFilter(logging.Filter):
|
||||
"""
|
||||
# Check if the log message contains health check endpoints
|
||||
message = record.getMessage()
|
||||
health_check = any(
|
||||
return not any(
|
||||
endpoint in message
|
||||
for endpoint in [
|
||||
"/health/live",
|
||||
@@ -47,8 +47,6 @@ class HealthCheckFilter(logging.Filter):
|
||||
]
|
||||
)
|
||||
|
||||
return not health_check
|
||||
|
||||
|
||||
class TraceContextFormatter(JsonFormatter):
|
||||
"""
|
||||
@@ -60,7 +58,7 @@ class TraceContextFormatter(JsonFormatter):
|
||||
|
||||
def add_fields(
|
||||
self,
|
||||
log_data: dict[str, Any],
|
||||
log_record: dict[str, Any],
|
||||
record: logging.LogRecord,
|
||||
message_dict: dict[str, Any],
|
||||
) -> None:
|
||||
@@ -68,28 +66,28 @@ class TraceContextFormatter(JsonFormatter):
|
||||
Add custom fields to the log record, including trace context.
|
||||
|
||||
Args:
|
||||
log_data: Dictionary to be serialized as JSON
|
||||
log_record: Dictionary to be serialized as JSON
|
||||
record: LogRecord instance
|
||||
message_dict: Dictionary of extra fields from log call
|
||||
"""
|
||||
# Call parent to add standard fields
|
||||
super().add_fields(log_data, record, message_dict)
|
||||
super().add_fields(log_record, record, message_dict)
|
||||
|
||||
# Add trace context if available
|
||||
trace_context = get_trace_context()
|
||||
if trace_context:
|
||||
log_data["trace_id"] = trace_context.get("trace_id")
|
||||
log_data["span_id"] = trace_context.get("span_id")
|
||||
log_record["trace_id"] = trace_context.get("trace_id")
|
||||
log_record["span_id"] = trace_context.get("span_id")
|
||||
|
||||
# Add standard fields with consistent naming
|
||||
log_data["timestamp"] = self.formatTime(record)
|
||||
log_data["level"] = record.levelname
|
||||
log_data["logger"] = record.name
|
||||
log_data["message"] = record.getMessage()
|
||||
log_record["timestamp"] = self.formatTime(record)
|
||||
log_record["level"] = record.levelname
|
||||
log_record["logger"] = record.name
|
||||
log_record["message"] = record.getMessage()
|
||||
|
||||
# Include exception info if present
|
||||
if record.exc_info:
|
||||
log_data["exception"] = self.formatException(record.exc_info)
|
||||
log_record["exception"] = self.formatException(record.exc_info)
|
||||
|
||||
|
||||
class TraceContextTextFormatter(logging.Formatter):
|
||||
|
||||
@@ -53,11 +53,10 @@ def setup_tracing(
|
||||
global _tracer
|
||||
|
||||
# Create resource with service name
|
||||
pkg_name = __package__.split(".")[0] if __package__ else "nextcloud_mcp_server"
|
||||
resource = Resource.create(
|
||||
{
|
||||
"service.name": service_name,
|
||||
"service.version": version(pkg_name),
|
||||
"service.version": version(__package__.split(".")[0]),
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -4,14 +4,12 @@ from .anthropic import AnthropicProvider
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .openai import OpenAIProvider
|
||||
from .registry import get_provider, reset_provider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
__all__ = [
|
||||
"Provider",
|
||||
"OllamaProvider",
|
||||
"OpenAIProvider",
|
||||
"AnthropicProvider",
|
||||
"SimpleProvider",
|
||||
"BedrockProvider",
|
||||
|
||||
@@ -17,20 +17,18 @@ class AnthropicProvider(Provider):
|
||||
Note: Anthropic doesn't provide embedding models, only text generation.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, api_key: str, generation_model: str = "claude-3-5-sonnet-20241022"
|
||||
):
|
||||
def __init__(self, api_key: str, model: str = "claude-3-5-sonnet-20241022"):
|
||||
"""
|
||||
Initialize Anthropic provider.
|
||||
|
||||
Args:
|
||||
api_key: Anthropic API key
|
||||
generation_model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
model: Model name (e.g., "claude-3-5-sonnet-20241022")
|
||||
"""
|
||||
self.client = AsyncAnthropic(api_key=api_key)
|
||||
self.model = generation_model
|
||||
self.model = model
|
||||
|
||||
logger.info(f"Initialized Anthropic provider (model={self.model})")
|
||||
logger.info(f"Initialized Anthropic provider (model={model})")
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
|
||||
@@ -92,21 +92,14 @@ class OllamaProvider(Provider):
|
||||
response.raise_for_status()
|
||||
return response.json()["embedding"]
|
||||
|
||||
async def embed_batch(
|
||||
self, texts: list[str], batch_size: int = 32
|
||||
) -> list[list[float]]:
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts using Ollama's batch API.
|
||||
Generate embeddings for multiple texts (batched requests).
|
||||
|
||||
Uses /api/embed endpoint with array input for efficient batch processing.
|
||||
Conservative batch size (32) prevents quality degradation observed in
|
||||
Ollama issue #6262 with larger batches.
|
||||
|
||||
Note: Ollama processes batches serially, not in parallel.
|
||||
Note: Ollama doesn't have native batch API, so we send requests sequentially.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
batch_size: Maximum texts per batch (default: 32)
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
@@ -119,17 +112,11 @@ class OllamaProvider(Provider):
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
all_embeddings = []
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
response = await self.client.post(
|
||||
f"{self.base_url}/api/embed",
|
||||
json={"model": self.embedding_model, "input": batch},
|
||||
)
|
||||
response.raise_for_status()
|
||||
all_embeddings.extend(response.json()["embeddings"])
|
||||
|
||||
return all_embeddings
|
||||
embeddings = []
|
||||
for text in texts:
|
||||
embedding = await self.embed(text)
|
||||
embeddings.append(embedding)
|
||||
return embeddings
|
||||
|
||||
async def _detect_dimension(self):
|
||||
"""
|
||||
|
||||
@@ -1,271 +0,0 @@
|
||||
"""Unified OpenAI provider for embeddings and text generation.
|
||||
|
||||
Supports:
|
||||
- OpenAI's standard API
|
||||
- GitHub Models API (models.github.ai)
|
||||
- Any OpenAI-compatible API via base_url override
|
||||
"""
|
||||
|
||||
import logging
|
||||
from functools import wraps
|
||||
|
||||
import anyio
|
||||
from openai import AsyncOpenAI, RateLimitError
|
||||
|
||||
from .base import Provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Rate limit retry configuration
|
||||
MAX_RETRIES = 5
|
||||
INITIAL_RETRY_DELAY = 2.0 # seconds
|
||||
MAX_RETRY_DELAY = 60.0 # seconds
|
||||
|
||||
|
||||
def retry_on_rate_limit(func):
|
||||
"""Decorator to retry on OpenAI rate limit errors with exponential backoff."""
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(*args, **kwargs):
|
||||
retry_delay = INITIAL_RETRY_DELAY
|
||||
last_error: Exception | None = None
|
||||
|
||||
for attempt in range(1, MAX_RETRIES + 1):
|
||||
try:
|
||||
return await func(*args, **kwargs)
|
||||
except RateLimitError as e:
|
||||
last_error = e
|
||||
if attempt < MAX_RETRIES:
|
||||
logger.warning(
|
||||
f"Rate limit hit (attempt {attempt}/{MAX_RETRIES}), "
|
||||
f"retrying in {retry_delay:.1f}s..."
|
||||
)
|
||||
await anyio.sleep(retry_delay)
|
||||
retry_delay = min(retry_delay * 2, MAX_RETRY_DELAY)
|
||||
|
||||
logger.error(f"Rate limit exceeded after {MAX_RETRIES} attempts")
|
||||
raise last_error # type: ignore[misc]
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
# Well-known embedding dimensions for OpenAI models
|
||||
OPENAI_EMBEDDING_DIMENSIONS: dict[str, int] = {
|
||||
"text-embedding-3-small": 1536,
|
||||
"text-embedding-3-large": 3072,
|
||||
"text-embedding-ada-002": 1536,
|
||||
# GitHub Models API uses openai/ prefix
|
||||
"openai/text-embedding-3-small": 1536,
|
||||
"openai/text-embedding-3-large": 3072,
|
||||
}
|
||||
|
||||
|
||||
class OpenAIProvider(Provider):
|
||||
"""
|
||||
OpenAI provider supporting both embeddings and text generation.
|
||||
|
||||
Works with:
|
||||
- OpenAI's standard API (api.openai.com)
|
||||
- GitHub Models API (models.github.ai)
|
||||
- Any OpenAI-compatible API (via base_url)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
embedding_model: str | None = None,
|
||||
generation_model: str | None = None,
|
||||
timeout: float = 120.0,
|
||||
):
|
||||
"""
|
||||
Initialize OpenAI provider.
|
||||
|
||||
Args:
|
||||
api_key: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
|
||||
base_url: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
embedding_model: Model for embeddings (e.g., "text-embedding-3-small").
|
||||
None disables embeddings.
|
||||
generation_model: Model for text generation (e.g., "gpt-4o-mini").
|
||||
None disables generation.
|
||||
timeout: HTTP timeout in seconds (default: 120)
|
||||
"""
|
||||
self.embedding_model = embedding_model
|
||||
self.generation_model = generation_model
|
||||
self._dimension: int | None = None
|
||||
|
||||
# Initialize async client
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Try to get known dimension without API call
|
||||
if embedding_model and embedding_model in OPENAI_EMBEDDING_DIMENSIONS:
|
||||
self._dimension = OPENAI_EMBEDDING_DIMENSIONS[embedding_model]
|
||||
|
||||
logger.info(
|
||||
f"Initialized OpenAI provider: base_url={base_url or 'default'} "
|
||||
f"(embedding_model={embedding_model}, generation_model={generation_model}, "
|
||||
f"dimension={self._dimension})"
|
||||
)
|
||||
|
||||
@property
|
||||
def supports_embeddings(self) -> bool:
|
||||
"""Whether this provider supports embedding generation."""
|
||||
return self.embedding_model is not None
|
||||
|
||||
@property
|
||||
def supports_generation(self) -> bool:
|
||||
"""Whether this provider supports text generation."""
|
||||
return self.generation_model is not None
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def embed(self, text: str) -> list[float]:
|
||||
"""
|
||||
Generate embedding vector for text.
|
||||
|
||||
Args:
|
||||
text: Input text to embed
|
||||
|
||||
Returns:
|
||||
Vector embedding as list of floats
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
assert self.embedding_model is not None # Type narrowing
|
||||
response = await self.client.embeddings.create(
|
||||
input=text,
|
||||
model=self.embedding_model,
|
||||
)
|
||||
|
||||
embedding = response.data[0].embedding
|
||||
|
||||
# Update dimension if not set
|
||||
if self._dimension is None:
|
||||
self._dimension = len(embedding)
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
return embedding
|
||||
|
||||
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
||||
"""
|
||||
Generate embeddings for multiple texts using OpenAI's batch API.
|
||||
|
||||
OpenAI supports up to 2048 inputs per request.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of vector embeddings
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
# OpenAI supports batches up to 2048, but use smaller batches for safety
|
||||
batch_size = 100
|
||||
all_embeddings: list[list[float]] = []
|
||||
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
|
||||
# Use helper method with retry logic for each batch
|
||||
batch_embeddings = await self._embed_batch_request(batch)
|
||||
all_embeddings.extend(batch_embeddings)
|
||||
|
||||
# Update dimension if not set
|
||||
if self._dimension is None and batch_embeddings:
|
||||
self._dimension = len(batch_embeddings[0])
|
||||
logger.info(
|
||||
f"Detected embedding dimension: {self._dimension} "
|
||||
f"for model {self.embedding_model}"
|
||||
)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def _embed_batch_request(self, batch: list[str]) -> list[list[float]]:
|
||||
"""Make a single batch embedding request with retry logic."""
|
||||
assert self.embedding_model is not None # Type narrowing
|
||||
response = await self.client.embeddings.create(
|
||||
input=batch,
|
||||
model=self.embedding_model,
|
||||
)
|
||||
# Sort by index to maintain order
|
||||
sorted_data = sorted(response.data, key=lambda x: x.index)
|
||||
return [item.embedding for item in sorted_data]
|
||||
|
||||
def get_dimension(self) -> int:
|
||||
"""
|
||||
Get embedding dimension.
|
||||
|
||||
Returns:
|
||||
Vector dimension for the configured embedding model
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If embeddings not enabled (no embedding_model)
|
||||
RuntimeError: If dimension not detected yet (call embed first)
|
||||
"""
|
||||
if not self.supports_embeddings:
|
||||
raise NotImplementedError(
|
||||
"Embedding not supported - no embedding_model configured"
|
||||
)
|
||||
|
||||
if self._dimension is None:
|
||||
raise RuntimeError(
|
||||
f"Embedding dimension not detected yet for model {self.embedding_model}. "
|
||||
"Call embed() first or use a known model."
|
||||
)
|
||||
return self._dimension
|
||||
|
||||
@retry_on_rate_limit
|
||||
async def generate(self, prompt: str, max_tokens: int = 500) -> str:
|
||||
"""
|
||||
Generate text from a prompt.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate from
|
||||
max_tokens: Maximum tokens to generate
|
||||
|
||||
Returns:
|
||||
Generated text
|
||||
|
||||
Raises:
|
||||
NotImplementedError: If generation not enabled (no generation_model)
|
||||
"""
|
||||
if not self.supports_generation:
|
||||
raise NotImplementedError(
|
||||
"Text generation not supported - no generation_model configured"
|
||||
)
|
||||
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.generation_model,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
max_tokens=max_tokens,
|
||||
temperature=0.7,
|
||||
)
|
||||
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close HTTP client."""
|
||||
await self.client.close()
|
||||
@@ -6,7 +6,6 @@ import os
|
||||
from .base import Provider
|
||||
from .bedrock import BedrockProvider
|
||||
from .ollama import OllamaProvider
|
||||
from .openai import OpenAIProvider
|
||||
from .simple import SimpleProvider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -18,9 +17,8 @@ class ProviderRegistry:
|
||||
|
||||
Checks environment variables in priority order and creates appropriate provider:
|
||||
1. Bedrock (AWS_REGION + BEDROCK_*_MODEL)
|
||||
2. OpenAI (OPENAI_API_KEY)
|
||||
3. Ollama (OLLAMA_BASE_URL)
|
||||
4. Simple (fallback for testing/development)
|
||||
2. Ollama (OLLAMA_BASE_URL)
|
||||
3. Simple (fallback for testing/development)
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
@@ -30,9 +28,8 @@ class ProviderRegistry:
|
||||
|
||||
Priority order:
|
||||
1. Bedrock - if AWS_REGION or BEDROCK_EMBEDDING_MODEL is set
|
||||
2. OpenAI - if OPENAI_API_KEY is set
|
||||
3. Ollama - if OLLAMA_BASE_URL is set
|
||||
4. Simple - fallback for testing/development
|
||||
2. Ollama - if OLLAMA_BASE_URL is set
|
||||
3. Simple - fallback for testing/development
|
||||
|
||||
Returns:
|
||||
Provider instance
|
||||
@@ -45,12 +42,6 @@ class ProviderRegistry:
|
||||
- BEDROCK_EMBEDDING_MODEL: Model ID for embeddings (e.g., "amazon.titan-embed-text-v2:0")
|
||||
- BEDROCK_GENERATION_MODEL: Model ID for text generation (e.g., "anthropic.claude-3-sonnet-20240229-v1:0")
|
||||
|
||||
OpenAI:
|
||||
- OPENAI_API_KEY: OpenAI API key (or GITHUB_TOKEN for GitHub Models)
|
||||
- OPENAI_BASE_URL: Base URL override (e.g., "https://models.github.ai/inference")
|
||||
- OPENAI_EMBEDDING_MODEL: Model for embeddings (default: "text-embedding-3-small")
|
||||
- OPENAI_GENERATION_MODEL: Model for text generation (e.g., "gpt-4o-mini")
|
||||
|
||||
Ollama:
|
||||
- OLLAMA_BASE_URL: Ollama API base URL (e.g., "http://localhost:11434")
|
||||
- OLLAMA_EMBEDDING_MODEL: Model for embeddings (default: "nomic-embed-text")
|
||||
@@ -79,28 +70,7 @@ class ProviderRegistry:
|
||||
aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY"),
|
||||
)
|
||||
|
||||
# 2. Check for OpenAI
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
if openai_api_key:
|
||||
base_url = os.getenv("OPENAI_BASE_URL")
|
||||
embedding_model = os.getenv(
|
||||
"OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
|
||||
)
|
||||
generation_model = os.getenv("OPENAI_GENERATION_MODEL")
|
||||
|
||||
logger.info(
|
||||
f"Using OpenAI provider: base_url={base_url or 'default'}, "
|
||||
f"embedding_model={embedding_model}, "
|
||||
f"generation_model={generation_model}"
|
||||
)
|
||||
return OpenAIProvider(
|
||||
api_key=openai_api_key,
|
||||
base_url=base_url,
|
||||
embedding_model=embedding_model,
|
||||
generation_model=generation_model,
|
||||
)
|
||||
|
||||
# 3. Check for Ollama (local LLM)
|
||||
# 2. Check for Ollama
|
||||
ollama_url = os.getenv("OLLAMA_BASE_URL")
|
||||
if ollama_url:
|
||||
embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL", "nomic-embed-text")
|
||||
@@ -119,12 +89,12 @@ class ProviderRegistry:
|
||||
verify_ssl=verify_ssl,
|
||||
)
|
||||
|
||||
# 4. Fallback to Simple provider for development/testing
|
||||
# 3. Fallback to Simple provider for development/testing
|
||||
dimension = int(os.getenv("SIMPLE_EMBEDDING_DIMENSION", "384"))
|
||||
logger.warning(
|
||||
"No provider configured (AWS_REGION, OPENAI_API_KEY, OLLAMA_BASE_URL not set). "
|
||||
"No provider configured (AWS_REGION, OLLAMA_BASE_URL not set). "
|
||||
"Using SimpleProvider for testing/development. "
|
||||
"For production, configure Bedrock, OpenAI, or Ollama."
|
||||
"For production, configure Bedrock or Ollama."
|
||||
)
|
||||
return SimpleProvider(dimension=dimension)
|
||||
|
||||
|
||||
@@ -83,7 +83,6 @@ async def get_indexed_doc_types(user_id: str) -> set[str]:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -98,20 +97,17 @@ async def get_indexed_doc_types(user_id: str) -> set[str]:
|
||||
scroll_results, _next_offset = await qdrant_client.scroll(
|
||||
collection_name=collection,
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
get_placeholder_filter(), # Exclude placeholders from doc_type discovery
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
]
|
||||
must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
|
||||
),
|
||||
limit=1000, # Sample size to discover types
|
||||
with_payload=["doc_type"],
|
||||
with_vectors=False, # Don't need vectors for type discovery
|
||||
)
|
||||
|
||||
doc_types: set[str] = {
|
||||
str(point.payload.get("doc_type"))
|
||||
doc_types = {
|
||||
point.payload.get("doc_type")
|
||||
for point in scroll_results
|
||||
if point.payload and point.payload.get("doc_type")
|
||||
if point.payload.get("doc_type")
|
||||
}
|
||||
|
||||
logger.debug(f"Found indexed document types for user {user_id}: {doc_types}")
|
||||
@@ -127,7 +123,7 @@ class SearchResult:
|
||||
"""A single search result with metadata and score.
|
||||
|
||||
Attributes:
|
||||
id: Document ID (int for all document types)
|
||||
id: Document ID
|
||||
doc_type: Document type (note, file, calendar, contact, etc.)
|
||||
title: Document title
|
||||
excerpt: Content excerpt showing match context
|
||||
@@ -137,11 +133,6 @@ class SearchResult:
|
||||
metadata: Additional algorithm-specific metadata
|
||||
chunk_start_offset: Character position where chunk starts (None if not available)
|
||||
chunk_end_offset: Character position where chunk ends (None if not available)
|
||||
page_number: Page number for PDF documents (None for other doc types)
|
||||
page_count: Total number of pages in PDF document (None for other doc types)
|
||||
chunk_index: Zero-based index of this chunk in the document
|
||||
total_chunks: Total number of chunks in the document
|
||||
point_id: Qdrant point ID for batch vector retrieval (None if not from Qdrant)
|
||||
"""
|
||||
|
||||
id: int
|
||||
@@ -152,11 +143,6 @@ class SearchResult:
|
||||
metadata: dict[str, Any] | None = None
|
||||
chunk_start_offset: int | None = None
|
||||
chunk_end_offset: int | None = None
|
||||
page_number: int | None = None
|
||||
page_count: int | None = None
|
||||
chunk_index: int = 0
|
||||
total_chunks: int = 1
|
||||
point_id: str | None = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate score is non-negative.
|
||||
@@ -176,15 +162,8 @@ class SearchAlgorithm(ABC):
|
||||
|
||||
All search algorithms must implement the search() method with consistent
|
||||
interface, allowing them to be used interchangeably.
|
||||
|
||||
Attributes:
|
||||
query_embedding: The query embedding generated during the last search.
|
||||
Available after search() completes for algorithms that use embeddings.
|
||||
Can be reused by callers to avoid redundant embedding generation.
|
||||
"""
|
||||
|
||||
query_embedding: list[float] | None = None
|
||||
|
||||
@abstractmethod
|
||||
async def search(
|
||||
self,
|
||||
|
||||
@@ -9,9 +9,7 @@ from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_bm25_service, get_embedding_service
|
||||
from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
|
||||
from nextcloud_mcp_server.observability.tracing import trace_operation
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -74,9 +72,6 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
Returns unverified results from Qdrant. Access verification should be
|
||||
performed separately at the final output stage using verify_search_results().
|
||||
|
||||
Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
|
||||
to show multiple chunks from the same document while avoiding duplicate chunks.
|
||||
|
||||
Args:
|
||||
query: Natural language or keyword search query
|
||||
user_id: User ID for filtering
|
||||
@@ -100,19 +95,13 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
)
|
||||
|
||||
# Generate dense embedding for semantic search
|
||||
with trace_operation("search.get_embedding_service"):
|
||||
embedding_service = get_embedding_service()
|
||||
with trace_operation("search.dense_embedding"):
|
||||
dense_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = dense_embedding
|
||||
embedding_service = get_embedding_service()
|
||||
dense_embedding = await embedding_service.embed(query)
|
||||
logger.debug(f"Generated dense embedding (dimension={len(dense_embedding)})")
|
||||
|
||||
# Generate sparse embedding for BM25 keyword search
|
||||
with trace_operation("search.get_bm25_service"):
|
||||
bm25_service = get_bm25_service()
|
||||
with trace_operation("search.sparse_embedding_bm25"):
|
||||
sparse_embedding = await bm25_service.encode_async(query)
|
||||
bm25_service = get_bm25_service()
|
||||
sparse_embedding = bm25_service.encode(query)
|
||||
logger.debug(
|
||||
f"Generated sparse embedding "
|
||||
f"({len(sparse_embedding['indices'])} non-zero terms)"
|
||||
@@ -120,11 +109,10 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
|
||||
# Build Qdrant filter
|
||||
filter_conditions = [
|
||||
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match=MatchValue(value=user_id),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Add doc_type filter if specified
|
||||
@@ -139,44 +127,38 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
query_filter = Filter(must=filter_conditions)
|
||||
|
||||
# Execute hybrid search with Qdrant native RRF fusion
|
||||
with trace_operation("search.get_qdrant_client"):
|
||||
qdrant_client = await get_qdrant_client()
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
try:
|
||||
# Use prefetch to run both dense and sparse searches
|
||||
# Qdrant will automatically merge results using RRF
|
||||
with trace_operation(
|
||||
"search.qdrant_query",
|
||||
attributes={"query.limit": limit * 2, "query.fusion": self.fusion_name},
|
||||
):
|
||||
search_response = await qdrant_client.query_points(
|
||||
collection_name=settings.get_collection_name(),
|
||||
prefetch=[
|
||||
# Dense semantic search
|
||||
models.Prefetch(
|
||||
query=dense_embedding,
|
||||
using="dense",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
search_response = await qdrant_client.query_points(
|
||||
collection_name=settings.get_collection_name(),
|
||||
prefetch=[
|
||||
# Dense semantic search
|
||||
models.Prefetch(
|
||||
query=dense_embedding,
|
||||
using="dense",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
# Sparse BM25 search
|
||||
models.Prefetch(
|
||||
query=models.SparseVector(
|
||||
indices=sparse_embedding["indices"],
|
||||
values=sparse_embedding["values"],
|
||||
),
|
||||
# Sparse BM25 search
|
||||
models.Prefetch(
|
||||
query=models.SparseVector(
|
||||
indices=sparse_embedding["indices"],
|
||||
values=sparse_embedding["values"],
|
||||
),
|
||||
using="sparse",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
],
|
||||
# Fusion query (RRF or DBSF based on initialization)
|
||||
query=models.FusionQuery(fusion=self.fusion),
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
score_threshold=score_threshold,
|
||||
with_payload=True,
|
||||
with_vectors=False, # Don't return vectors to save bandwidth
|
||||
)
|
||||
using="sparse",
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
filter=query_filter,
|
||||
),
|
||||
],
|
||||
# Fusion query (RRF or DBSF based on initialization)
|
||||
query=models.FusionQuery(fusion=self.fusion),
|
||||
limit=limit * 2, # Get extra for deduplication
|
||||
score_threshold=score_threshold,
|
||||
with_payload=True,
|
||||
with_vectors=False, # Don't return vectors to save bandwidth
|
||||
)
|
||||
record_qdrant_operation("search", "success")
|
||||
except Exception:
|
||||
record_qdrant_operation("search", "error")
|
||||
@@ -194,68 +176,41 @@ class BM25HybridSearchAlgorithm(SearchAlgorithm):
|
||||
f"Top 3 {self.fusion_name.upper()} fusion scores: {top_scores}"
|
||||
)
|
||||
|
||||
# Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
|
||||
# This allows multiple chunks from same doc, but removes duplicate chunks
|
||||
with trace_operation(
|
||||
"search.deduplicate",
|
||||
attributes={"dedupe.num_points": len(search_response.points)},
|
||||
):
|
||||
seen_chunks = set()
|
||||
results = []
|
||||
# Deduplicate by (doc_id, doc_type) - multiple chunks per document
|
||||
seen_docs = set()
|
||||
results = []
|
||||
|
||||
for result in search_response.points:
|
||||
if result.payload is None:
|
||||
continue
|
||||
# doc_id can be int (notes) or str (files - file paths)
|
||||
doc_id = result.payload["doc_id"]
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
chunk_start = result.payload.get("chunk_start_offset")
|
||||
chunk_end = result.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
|
||||
for result in search_response.points:
|
||||
doc_id = int(result.payload["doc_id"])
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
doc_key = (doc_id, doc_type)
|
||||
|
||||
# Skip if we've already seen this exact chunk
|
||||
if chunk_key in seen_chunks:
|
||||
continue
|
||||
# Skip if we've already seen this document
|
||||
if doc_key in seen_docs:
|
||||
continue
|
||||
|
||||
seen_chunks.add(chunk_key)
|
||||
seen_docs.add(doc_key)
|
||||
|
||||
# Build metadata dict with common fields
|
||||
metadata = {
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
"search_method": f"bm25_hybrid_{self.fusion_name}",
|
||||
}
|
||||
|
||||
# Add file-specific metadata for PDF viewer
|
||||
if doc_type == "file" and (path := result.payload.get("file_path")):
|
||||
metadata["path"] = path
|
||||
|
||||
# Add deck_card-specific metadata for frontend URL construction
|
||||
if doc_type == "deck_card":
|
||||
if board_id := result.payload.get("board_id"):
|
||||
metadata["board_id"] = board_id
|
||||
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
SearchResult(
|
||||
id=doc_id,
|
||||
doc_type=doc_type,
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score, # Fusion score (RRF or DBSF)
|
||||
metadata=metadata,
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
page_number=result.payload.get("page_number"),
|
||||
page_count=result.payload.get("page_count"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
SearchResult(
|
||||
id=doc_id,
|
||||
doc_type=doc_type,
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score, # Fusion score (RRF or DBSF)
|
||||
metadata={
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
"search_method": f"bm25_hybrid_{self.fusion_name}",
|
||||
},
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
)
|
||||
)
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
logger.info(f"Returning {len(results)} unverified results after deduplication")
|
||||
if results:
|
||||
|
||||
@@ -1,748 +0,0 @@
|
||||
"""Context expansion for search results.
|
||||
|
||||
Provides utilities to expand matched chunks with surrounding context and
|
||||
position markers for better visualization and understanding of search results.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
from nextcloud_mcp_server.client import NextcloudClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def _get_chunk_from_qdrant(
|
||||
user_id: str, doc_id: int, doc_type: str, chunk_start: int, chunk_end: int
|
||||
) -> str | None:
|
||||
"""Retrieve full chunk text from Qdrant payload.
|
||||
|
||||
This avoids re-fetching and re-parsing documents by using the cached
|
||||
chunk content already stored in Qdrant.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID
|
||||
doc_type: Document type (e.g., "note", "file")
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
|
||||
Returns:
|
||||
Full chunk text from Qdrant excerpt field, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for the specific chunk
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=chunk_start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=chunk_end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["excerpt"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
excerpt = point.payload.get("excerpt")
|
||||
if excerpt:
|
||||
logger.debug(
|
||||
f"Retrieved chunk from Qdrant for {doc_type} {doc_id}: "
|
||||
f"{len(excerpt)} chars"
|
||||
)
|
||||
return str(excerpt)
|
||||
|
||||
logger.debug(
|
||||
f"Chunk not found in Qdrant for {doc_type} {doc_id}, "
|
||||
f"chunk [{chunk_start}:{chunk_end}]. Will fall back to document fetch."
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error querying Qdrant for chunk: {e}. Falling back to document fetch.",
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_chunk_by_index_from_qdrant(
|
||||
user_id: str, doc_id: int, doc_type: str, chunk_index: int
|
||||
) -> str | None:
|
||||
"""Retrieve chunk text by chunk_index from Qdrant payload.
|
||||
|
||||
Used to fetch adjacent chunks for context expansion.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID
|
||||
doc_type: Document type (e.g., "note", "file")
|
||||
chunk_index: Zero-based chunk index in document
|
||||
|
||||
Returns:
|
||||
Full chunk text from Qdrant excerpt field, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for chunk by index
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=doc_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value=doc_type)),
|
||||
FieldCondition(
|
||||
key="chunk_index", match=MatchValue(value=chunk_index)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["excerpt"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
excerpt = point.payload.get("excerpt")
|
||||
if excerpt:
|
||||
logger.debug(
|
||||
f"Retrieved adjacent chunk {chunk_index} from Qdrant for "
|
||||
f"{doc_type} {doc_id}: {len(excerpt)} chars"
|
||||
)
|
||||
return str(excerpt)
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(
|
||||
f"Could not retrieve adjacent chunk {chunk_index} for "
|
||||
f"{doc_type} {doc_id}: {e}"
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_file_path_from_qdrant(
|
||||
user_id: str, file_id: int, chunk_start: int, chunk_end: int
|
||||
) -> str | None:
|
||||
"""Resolve file_id to file_path by querying Qdrant payload.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the file
|
||||
file_id: Numeric file ID
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
|
||||
Returns:
|
||||
File path string, or None if not found in Qdrant
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for the specific chunk
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=file_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="file")),
|
||||
FieldCondition(
|
||||
key="chunk_start_offset", match=MatchValue(value=chunk_start)
|
||||
),
|
||||
FieldCondition(
|
||||
key="chunk_end_offset", match=MatchValue(value=chunk_end)
|
||||
),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["file_path"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
file_path = point.payload.get("file_path")
|
||||
if file_path:
|
||||
logger.debug(f"Resolved file_id {file_id} to file_path {file_path}")
|
||||
return str(file_path)
|
||||
|
||||
logger.warning(
|
||||
f"Could not find file_path in Qdrant for file_id {file_id}, "
|
||||
f"chunk [{chunk_start}:{chunk_end}]"
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error querying Qdrant for file_path: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
async def _get_deck_metadata_from_qdrant(
|
||||
user_id: str, card_id: int
|
||||
) -> dict[str, int] | None:
|
||||
"""Retrieve board_id and stack_id for a deck card from Qdrant payload.
|
||||
|
||||
Args:
|
||||
user_id: User ID who owns the card
|
||||
card_id: Card ID
|
||||
|
||||
Returns:
|
||||
Dictionary with board_id and stack_id, or None if not found
|
||||
"""
|
||||
try:
|
||||
from qdrant_client.models import FieldCondition, Filter, MatchValue
|
||||
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
qdrant_client = await get_qdrant_client()
|
||||
settings = get_settings()
|
||||
|
||||
# Query for any chunk of this card (we just need metadata)
|
||||
scroll_result = await qdrant_client.scroll(
|
||||
collection_name=settings.get_collection_name(),
|
||||
scroll_filter=Filter(
|
||||
must=[
|
||||
FieldCondition(key="user_id", match=MatchValue(value=user_id)),
|
||||
FieldCondition(key="doc_id", match=MatchValue(value=card_id)),
|
||||
FieldCondition(key="doc_type", match=MatchValue(value="deck_card")),
|
||||
]
|
||||
),
|
||||
limit=1,
|
||||
with_payload=["board_id", "stack_id"],
|
||||
with_vectors=False,
|
||||
)
|
||||
|
||||
if scroll_result[0]:
|
||||
point = scroll_result[0][0]
|
||||
board_id = point.payload.get("board_id")
|
||||
stack_id = point.payload.get("stack_id")
|
||||
if board_id is not None and stack_id is not None:
|
||||
logger.debug(
|
||||
f"Retrieved deck metadata for card {card_id}: "
|
||||
f"board_id={board_id}, stack_id={stack_id}"
|
||||
)
|
||||
return {"board_id": int(board_id), "stack_id": int(stack_id)}
|
||||
|
||||
logger.debug(
|
||||
f"Could not find deck metadata in Qdrant for card {card_id} "
|
||||
f"(might be legacy data without board_id/stack_id)"
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Error querying Qdrant for deck metadata: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkContext:
|
||||
"""Expanded chunk with surrounding context and position markers.
|
||||
|
||||
Attributes:
|
||||
chunk_text: The matched chunk text
|
||||
before_context: Text before the chunk (up to context_chars)
|
||||
after_context: Text after the chunk (up to context_chars)
|
||||
chunk_start_offset: Character position where chunk starts in document
|
||||
chunk_end_offset: Character position where chunk ends in document
|
||||
page_number: Page number for PDFs (None for other doc types)
|
||||
chunk_index: Zero-based chunk index (N in "chunk N of M")
|
||||
total_chunks: Total number of chunks in document
|
||||
marked_text: Full text with position markers around the chunk
|
||||
has_before_truncation: True if before_context was truncated
|
||||
has_after_truncation: True if after_context was truncated
|
||||
"""
|
||||
|
||||
chunk_text: str
|
||||
before_context: str
|
||||
after_context: str
|
||||
chunk_start_offset: int
|
||||
chunk_end_offset: int
|
||||
page_number: int | None
|
||||
chunk_index: int
|
||||
total_chunks: int
|
||||
marked_text: str
|
||||
has_before_truncation: bool
|
||||
has_after_truncation: bool
|
||||
|
||||
|
||||
async def get_chunk_with_context(
|
||||
nc_client: NextcloudClient,
|
||||
user_id: str,
|
||||
doc_id: str | int,
|
||||
doc_type: str,
|
||||
chunk_start: int,
|
||||
chunk_end: int,
|
||||
page_number: int | None = None,
|
||||
chunk_index: int = 0,
|
||||
total_chunks: int = 1,
|
||||
context_chars: int = 300,
|
||||
) -> ChunkContext | None:
|
||||
"""Fetch chunk with surrounding context.
|
||||
|
||||
First tries to retrieve the chunk from Qdrant (fast, cached). If that fails
|
||||
(e.g., legacy data with truncated excerpts), falls back to fetching and
|
||||
parsing the full document (slower, for PDFs especially).
|
||||
|
||||
Args:
|
||||
nc_client: Authenticated Nextcloud client
|
||||
user_id: User ID who owns the document
|
||||
doc_id: Document ID (int for notes/files)
|
||||
doc_type: Type of document ("note", "file", etc.)
|
||||
chunk_start: Character offset where chunk starts
|
||||
chunk_end: Character offset where chunk ends
|
||||
page_number: Optional page number for PDFs
|
||||
chunk_index: Zero-based chunk index in document
|
||||
total_chunks: Total number of chunks in document
|
||||
context_chars: Number of characters to include before/after chunk
|
||||
|
||||
Returns:
|
||||
ChunkContext with expanded context and markers, or None if document
|
||||
cannot be retrieved
|
||||
"""
|
||||
# Convert doc_id to int for Qdrant query
|
||||
doc_id_int = (
|
||||
int(doc_id)
|
||||
if isinstance(doc_id, str) and doc_id.isdigit()
|
||||
else (doc_id if isinstance(doc_id, int) else None)
|
||||
)
|
||||
|
||||
# Try to get chunk from Qdrant first (fast path)
|
||||
if doc_id_int is not None:
|
||||
chunk_text = await _get_chunk_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_start, chunk_end
|
||||
)
|
||||
if chunk_text:
|
||||
logger.info(
|
||||
f"Retrieved chunk from Qdrant cache for {doc_type} {doc_id} "
|
||||
f"(avoids document re-fetch/re-parse)"
|
||||
)
|
||||
|
||||
# Fetch adjacent chunks for context expansion
|
||||
# Get chunk overlap from config to remove duplicate text
|
||||
from nextcloud_mcp_server.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
chunk_overlap = settings.document_chunk_overlap
|
||||
|
||||
before_context = ""
|
||||
after_context = ""
|
||||
has_before_truncation = False
|
||||
has_after_truncation = False
|
||||
|
||||
# Fetch previous chunk if not first chunk
|
||||
if chunk_index > 0:
|
||||
before_chunk = await _get_chunk_by_index_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_index - 1
|
||||
)
|
||||
if before_chunk:
|
||||
# Remove overlap: the last chunk_overlap chars of previous chunk
|
||||
# overlap with the first chunk_overlap chars of current chunk
|
||||
before_context = (
|
||||
before_chunk[:-chunk_overlap]
|
||||
if len(before_chunk) > chunk_overlap
|
||||
else ""
|
||||
)
|
||||
# Truncate if requested context_chars < remaining length
|
||||
if before_context and len(before_context) > context_chars:
|
||||
before_context = before_context[-context_chars:]
|
||||
has_before_truncation = True
|
||||
else:
|
||||
# Could not fetch previous chunk, but we're not at start
|
||||
has_before_truncation = True
|
||||
|
||||
# Fetch next chunk if not last chunk
|
||||
if chunk_index < total_chunks - 1:
|
||||
after_chunk = await _get_chunk_by_index_from_qdrant(
|
||||
user_id, doc_id_int, doc_type, chunk_index + 1
|
||||
)
|
||||
if after_chunk:
|
||||
# Remove overlap: the first chunk_overlap chars of next chunk
|
||||
# overlap with the last chunk_overlap chars of current chunk
|
||||
after_context = (
|
||||
after_chunk[chunk_overlap:]
|
||||
if len(after_chunk) > chunk_overlap
|
||||
else ""
|
||||
)
|
||||
# Truncate if requested context_chars < remaining length
|
||||
if after_context and len(after_context) > context_chars:
|
||||
after_context = after_context[:context_chars]
|
||||
has_after_truncation = True
|
||||
else:
|
||||
# Could not fetch next chunk, but we're not at end
|
||||
has_after_truncation = True
|
||||
|
||||
marked_text = _insert_position_markers(
|
||||
before_context=before_context,
|
||||
chunk_text=chunk_text,
|
||||
after_context=after_context,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
return ChunkContext(
|
||||
chunk_text=chunk_text,
|
||||
before_context=before_context,
|
||||
after_context=after_context,
|
||||
chunk_start_offset=chunk_start,
|
||||
chunk_end_offset=chunk_end,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
marked_text=marked_text,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
# Fallback: Fetch full document and extract chunk with context
|
||||
# This path is taken for:
|
||||
# 1. Legacy data with truncated excerpts in Qdrant
|
||||
# 2. Failed Qdrant queries
|
||||
logger.info(
|
||||
f"Falling back to document fetch for {doc_type} {doc_id} "
|
||||
f"(Qdrant cache miss, possibly legacy data)"
|
||||
)
|
||||
|
||||
# For files, retrieve file_path from Qdrant payload
|
||||
resolved_doc_id = doc_id
|
||||
if doc_type == "file" and isinstance(doc_id, int):
|
||||
file_path = await _get_file_path_from_qdrant(
|
||||
user_id, doc_id, chunk_start, chunk_end
|
||||
)
|
||||
if not file_path:
|
||||
logger.warning(
|
||||
f"Could not resolve file_id {doc_id} to file_path from Qdrant"
|
||||
)
|
||||
return None
|
||||
resolved_doc_id = file_path
|
||||
logger.debug(f"Resolved file_id {doc_id} to file_path {file_path}")
|
||||
|
||||
# Fetch full document text
|
||||
full_text = await _fetch_document_text(
|
||||
nc_client, resolved_doc_id, doc_type, user_id
|
||||
)
|
||||
if full_text is None:
|
||||
logger.warning(
|
||||
f"Could not fetch document text for {doc_type} {doc_id}, "
|
||||
"skipping context expansion"
|
||||
)
|
||||
return None
|
||||
|
||||
# Validate offsets
|
||||
if chunk_start < 0 or chunk_end > len(full_text) or chunk_start >= chunk_end:
|
||||
logger.warning(
|
||||
f"Invalid chunk offsets for {doc_type} {doc_id}: "
|
||||
f"start={chunk_start}, end={chunk_end}, doc_len={len(full_text)}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Extract chunk text
|
||||
chunk_text = full_text[chunk_start:chunk_end]
|
||||
|
||||
# Calculate context boundaries
|
||||
context_start = max(0, chunk_start - context_chars)
|
||||
context_end = min(len(full_text), chunk_end + context_chars)
|
||||
|
||||
# Extract context
|
||||
before_context = full_text[context_start:chunk_start]
|
||||
after_context = full_text[chunk_end:context_end]
|
||||
|
||||
# Check for truncation
|
||||
has_before_truncation = context_start > 0
|
||||
has_after_truncation = context_end < len(full_text)
|
||||
|
||||
# Create marked text with position markers
|
||||
marked_text = _insert_position_markers(
|
||||
before_context=before_context,
|
||||
chunk_text=chunk_text,
|
||||
after_context=after_context,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
return ChunkContext(
|
||||
chunk_text=chunk_text,
|
||||
before_context=before_context,
|
||||
after_context=after_context,
|
||||
chunk_start_offset=chunk_start,
|
||||
chunk_end_offset=chunk_end,
|
||||
page_number=page_number,
|
||||
chunk_index=chunk_index,
|
||||
total_chunks=total_chunks,
|
||||
marked_text=marked_text,
|
||||
has_before_truncation=has_before_truncation,
|
||||
has_after_truncation=has_after_truncation,
|
||||
)
|
||||
|
||||
|
||||
async def _fetch_document_text(
|
||||
nc_client: NextcloudClient, doc_id: str | int, doc_type: str, user_id: str
|
||||
) -> str | None:
|
||||
"""Fetch full text content of a document.
|
||||
|
||||
Args:
|
||||
nc_client: Authenticated Nextcloud client
|
||||
doc_id: Document ID (note ID or file path)
|
||||
doc_type: Type of document ("note", "file", etc.)
|
||||
|
||||
Returns:
|
||||
Full document text, or None if document cannot be retrieved
|
||||
"""
|
||||
try:
|
||||
if doc_type == "note":
|
||||
# Fetch note by ID
|
||||
note = await nc_client.notes.get_note(note_id=int(doc_id))
|
||||
# Reconstruct full content as indexed: title + "\n\n" + content
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
title = note.get("title", "")
|
||||
content = note.get("content", "")
|
||||
return f"{title}\n\n{content}"
|
||||
elif doc_type == "file":
|
||||
# Fetch file content via WebDAV
|
||||
try:
|
||||
file_path = str(doc_id)
|
||||
file_content, content_type = await nc_client.webdav.read_file(file_path)
|
||||
|
||||
# Check if it's a PDF (by content type or file extension)
|
||||
is_pdf = (
|
||||
content_type and "pdf" in content_type.lower()
|
||||
) or file_path.lower().endswith(".pdf")
|
||||
|
||||
if is_pdf:
|
||||
# Extract text from PDF using PyMuPDF
|
||||
# IMPORTANT: Use pymupdf4llm.to_markdown() to match indexing extraction
|
||||
# This ensures character offsets align between indexed chunks and retrieval
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
logger.debug(f"Extracting text from PDF: {file_path}")
|
||||
pdf_doc = pymupdf.open(stream=file_content, filetype="pdf")
|
||||
text_parts = []
|
||||
|
||||
# Extract each page as markdown (same as indexing)
|
||||
for page_num in range(pdf_doc.page_count):
|
||||
page_md = pymupdf4llm.to_markdown(
|
||||
pdf_doc,
|
||||
pages=[page_num],
|
||||
write_images=False, # Don't need images for context
|
||||
page_chunks=False,
|
||||
)
|
||||
text_parts.append(page_md)
|
||||
|
||||
pdf_doc.close()
|
||||
|
||||
# Join pages (no separator - matches indexing)
|
||||
full_text = "".join(text_parts)
|
||||
logger.debug(
|
||||
f"Extracted {len(full_text)} characters from "
|
||||
f"{pdf_doc.page_count} pages in {file_path}"
|
||||
)
|
||||
return full_text
|
||||
else:
|
||||
# Assume it's a text file, decode to string
|
||||
logger.debug(f"Decoding text file: {file_path}")
|
||||
return file_content.decode("utf-8", errors="replace")
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error fetching file content for {doc_id}: {e}", exc_info=True
|
||||
)
|
||||
return None
|
||||
elif doc_type == "news_item":
|
||||
# Fetch news item by ID
|
||||
from nextcloud_mcp_server.vector.html_processor import html_to_markdown
|
||||
|
||||
item = await nc_client.news.get_item(int(doc_id))
|
||||
# Reconstruct full content as indexed: title + source + URL + body
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
body_markdown = html_to_markdown(item.get("body", ""))
|
||||
item_title = item.get("title", "")
|
||||
item_url = item.get("url", "")
|
||||
feed_title = item.get("feedTitle", "")
|
||||
|
||||
content_parts = [item_title]
|
||||
if feed_title:
|
||||
content_parts.append(f"Source: {feed_title}")
|
||||
if item_url:
|
||||
content_parts.append(f"URL: {item_url}")
|
||||
content_parts.append("") # Blank line
|
||||
content_parts.append(body_markdown)
|
||||
return "\n".join(content_parts)
|
||||
elif doc_type == "deck_card":
|
||||
# Fetch card from Deck API
|
||||
# Try to get board_id/stack_id from Qdrant metadata (O(1) lookup)
|
||||
# Otherwise fall back to iteration (legacy data)
|
||||
card = None
|
||||
deck_metadata = await _get_deck_metadata_from_qdrant(user_id, int(doc_id))
|
||||
|
||||
if deck_metadata:
|
||||
# Fast path: Direct lookup with known board_id/stack_id
|
||||
board_id = deck_metadata["board_id"]
|
||||
stack_id = deck_metadata["stack_id"]
|
||||
try:
|
||||
card = await nc_client.deck.get_card(
|
||||
board_id=board_id, stack_id=stack_id, card_id=int(doc_id)
|
||||
)
|
||||
logger.debug(
|
||||
f"Retrieved deck card {doc_id} using metadata "
|
||||
f"(board_id={board_id}, stack_id={stack_id})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to fetch card with metadata (board_id={board_id}, "
|
||||
f"stack_id={stack_id}, card_id={doc_id}): {e}, falling back to iteration"
|
||||
)
|
||||
|
||||
# Fallback: Iterate through all boards/stacks (for legacy data or if fast path failed)
|
||||
if card is None:
|
||||
boards = await nc_client.deck.get_boards()
|
||||
card_found = False
|
||||
|
||||
for board in boards:
|
||||
if card_found:
|
||||
break
|
||||
|
||||
# Skip deleted boards (soft delete: deletedAt > 0)
|
||||
if board.deletedAt > 0:
|
||||
logger.debug(
|
||||
f"Skipping deleted board {board.id} while searching for card {doc_id}"
|
||||
)
|
||||
continue
|
||||
|
||||
stacks = await nc_client.deck.get_stacks(board.id)
|
||||
|
||||
for stack in stacks:
|
||||
if card_found:
|
||||
break
|
||||
if stack.cards:
|
||||
for c in stack.cards:
|
||||
if c.id == int(doc_id):
|
||||
card = c
|
||||
card_found = True
|
||||
logger.debug(
|
||||
f"Found deck card {doc_id} in board {board.id}, "
|
||||
f"stack {stack.id} (fallback iteration)"
|
||||
)
|
||||
break
|
||||
|
||||
if not card_found:
|
||||
logger.warning(f"Deck card {doc_id} not found in any board/stack")
|
||||
return None
|
||||
|
||||
# Type narrowing: card is set if we reach here
|
||||
assert card is not None
|
||||
|
||||
# Reconstruct full content as indexed: title + "\n\n" + description
|
||||
# This ensures chunk offsets align with indexed content structure
|
||||
content_parts = [card.title]
|
||||
if card.description:
|
||||
content_parts.append(card.description)
|
||||
return "\n\n".join(content_parts)
|
||||
else:
|
||||
logger.warning(f"Unsupported doc_type for context expansion: {doc_type}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching document {doc_type} {doc_id}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
|
||||
def _insert_position_markers(
|
||||
before_context: str,
|
||||
chunk_text: str,
|
||||
after_context: str,
|
||||
page_number: int | None,
|
||||
chunk_index: int,
|
||||
total_chunks: int,
|
||||
has_before_truncation: bool,
|
||||
has_after_truncation: bool,
|
||||
) -> str:
|
||||
"""Insert position markers around matched chunk.
|
||||
|
||||
Creates markdown-formatted text with visual markers indicating chunk
|
||||
boundaries and metadata.
|
||||
|
||||
Args:
|
||||
before_context: Text before chunk
|
||||
chunk_text: The matched chunk
|
||||
after_context: Text after chunk
|
||||
page_number: Optional page number
|
||||
chunk_index: Zero-based chunk index
|
||||
total_chunks: Total chunks in document
|
||||
has_before_truncation: Whether before_context is truncated
|
||||
has_after_truncation: Whether after_context is truncated
|
||||
|
||||
Returns:
|
||||
Formatted text with position markers
|
||||
"""
|
||||
# Build position metadata
|
||||
position_parts = []
|
||||
if page_number is not None:
|
||||
position_parts.append(f"Page {page_number}")
|
||||
position_parts.append(f"Chunk {chunk_index + 1} of {total_chunks}")
|
||||
position_metadata = ", ".join(position_parts)
|
||||
|
||||
# Build marked text
|
||||
parts = []
|
||||
|
||||
# Add truncation indicator for before context
|
||||
if has_before_truncation:
|
||||
parts.append("**[...]**\n\n")
|
||||
|
||||
# Add before context if present
|
||||
if before_context:
|
||||
parts.append(before_context)
|
||||
|
||||
# Add chunk start marker
|
||||
parts.append(f"\n\n🔍 **MATCHED CHUNK START** ({position_metadata})\n\n")
|
||||
|
||||
# Add chunk text
|
||||
parts.append(chunk_text)
|
||||
|
||||
# Add chunk end marker
|
||||
parts.append("\n\n🔍 **MATCHED CHUNK END**\n\n")
|
||||
|
||||
# Add after context if present
|
||||
if after_context:
|
||||
parts.append(after_context)
|
||||
|
||||
# Add truncation indicator for after context
|
||||
if has_after_truncation:
|
||||
parts.append("\n\n**[...]**")
|
||||
|
||||
return "".join(parts)
|
||||
@@ -1,907 +0,0 @@
|
||||
"""PDF chunk highlighting utilities for vector visualization.
|
||||
|
||||
This module provides utilities to generate highlighted page images showing
|
||||
matched chunks and their context from semantic search results.
|
||||
|
||||
The highlighting uses character offsets to precisely locate chunks within
|
||||
PDF documents, ensuring accurate highlighting even when text formatting
|
||||
varies between indexing and rendering.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import pymupdf
|
||||
import pymupdf4llm
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PDFHighlighter:
|
||||
"""Generate highlighted page images from PDF chunks."""
|
||||
|
||||
# Color definitions (RGB, 0-1 range)
|
||||
COLORS = {
|
||||
"yellow": [1, 1, 0],
|
||||
"red": [1, 0, 0],
|
||||
"green": [0, 1, 0],
|
||||
"blue": [0, 0, 1],
|
||||
"orange": [1, 0.5, 0],
|
||||
"pink": [1, 0, 1],
|
||||
"gray": [0.7, 0.7, 0.7],
|
||||
"light_blue": [0.7, 0.9, 1.0],
|
||||
"light_green": [0.7, 1.0, 0.7],
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def strip_markdown(text: str) -> str:
|
||||
"""Remove markdown formatting to improve search accuracy.
|
||||
|
||||
Args:
|
||||
text: Text with potential markdown formatting
|
||||
|
||||
Returns:
|
||||
Plain text with markdown removed
|
||||
"""
|
||||
# Remove bold/italic markers
|
||||
text = re.sub(r"\*\*(.+?)\*\*", r"\1", text)
|
||||
text = re.sub(r"\*(.+?)\*", r"\1", text)
|
||||
text = re.sub(r"__(.+?)__", r"\1", text)
|
||||
text = re.sub(r"_(.+?)_", r"\1", text)
|
||||
|
||||
# Remove headers
|
||||
text = re.sub(r"^#+\s+", "", text, flags=re.MULTILINE)
|
||||
|
||||
# Remove inline code
|
||||
text = re.sub(r"`(.+?)`", r"\1", text)
|
||||
|
||||
return text.strip()
|
||||
|
||||
@staticmethod
|
||||
def extract_pdf_text_with_boundaries(
|
||||
pdf_doc: pymupdf.Document,
|
||||
) -> tuple[str, list[dict]]:
|
||||
"""Extract full document text with page boundary tracking.
|
||||
|
||||
Uses pymupdf4llm.to_markdown() for consistency with indexing.
|
||||
|
||||
IMPORTANT: Must use write_images=True to match PyMuPDFProcessor behavior!
|
||||
Even though we don't need the images, we need the image references in the
|
||||
markdown text to maintain consistent character offsets with indexing.
|
||||
|
||||
Args:
|
||||
pdf_doc: Open PyMuPDF document
|
||||
|
||||
Returns:
|
||||
Tuple of (full_text, page_boundaries) where page_boundaries is a list of:
|
||||
{"page": 1, "start_offset": 0, "end_offset": 1234}
|
||||
"""
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
page_boundaries = []
|
||||
text_parts = []
|
||||
current_offset = 0
|
||||
|
||||
# Use temp directory for image output (images are discarded after extraction)
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
|
||||
|
||||
for page_idx in range(pdf_doc.page_count):
|
||||
page_md = pymupdf4llm.to_markdown(
|
||||
pdf_doc,
|
||||
pages=[page_idx],
|
||||
write_images=True, # Must match indexing! Otherwise offsets misalign
|
||||
image_path=temp_dir,
|
||||
page_chunks=False,
|
||||
)
|
||||
|
||||
page_boundaries.append(
|
||||
{
|
||||
"page": page_idx + 1, # 1-indexed
|
||||
"start_offset": current_offset,
|
||||
"end_offset": current_offset + len(page_md),
|
||||
}
|
||||
)
|
||||
|
||||
text_parts.append(page_md)
|
||||
current_offset += len(page_md)
|
||||
|
||||
full_text = "".join(text_parts)
|
||||
|
||||
# Clean up temp directory and extracted images
|
||||
import shutil
|
||||
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp directory {temp_dir}: {e}")
|
||||
|
||||
return full_text, page_boundaries
|
||||
|
||||
@staticmethod
|
||||
def find_chunk_page(
|
||||
chunk_start_offset: int,
|
||||
chunk_end_offset: int,
|
||||
page_boundaries: list[dict],
|
||||
) -> Optional[dict]:
|
||||
"""Find which page contains the most of a given chunk.
|
||||
|
||||
Args:
|
||||
chunk_start_offset: Chunk start position in full document
|
||||
chunk_end_offset: Chunk end position in full document
|
||||
page_boundaries: Page boundary list from extract_pdf_text_with_boundaries()
|
||||
|
||||
Returns:
|
||||
Dict with keys: page_num, overlap_chars, page_relative_start, page_relative_end
|
||||
or None if chunk not found on any page
|
||||
"""
|
||||
chunk_pages = []
|
||||
|
||||
for boundary in page_boundaries:
|
||||
page_start = boundary["start_offset"]
|
||||
page_end = boundary["end_offset"]
|
||||
|
||||
# Check if chunk overlaps with this page
|
||||
if chunk_start_offset < page_end and chunk_end_offset > page_start:
|
||||
overlap_start = max(chunk_start_offset, page_start)
|
||||
overlap_end = min(chunk_end_offset, page_end)
|
||||
overlap_chars = overlap_end - overlap_start
|
||||
|
||||
chunk_pages.append(
|
||||
{
|
||||
"page_num": boundary["page"],
|
||||
"overlap_chars": overlap_chars,
|
||||
"page_relative_start": overlap_start - page_start,
|
||||
"page_relative_end": overlap_end - page_start,
|
||||
}
|
||||
)
|
||||
|
||||
if not chunk_pages:
|
||||
return None
|
||||
|
||||
# Return page with maximum overlap
|
||||
return max(chunk_pages, key=lambda p: p["overlap_chars"])
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk_by_word_positions(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
color: str = "yellow",
|
||||
search_region: tuple[float, float, float, float] | None = None,
|
||||
) -> int:
|
||||
"""Highlight chunk using word-position matching.
|
||||
|
||||
This method matches words from the chunk to their positions on the PDF page,
|
||||
avoiding text search mismatches between markdown-formatted text and raw PDF text.
|
||||
|
||||
Args:
|
||||
page: PyMuPDF page object
|
||||
chunk_text: Text to highlight (may contain markdown)
|
||||
color: Color name from COLORS dict
|
||||
search_region: Optional (x0, y0, x1, y1) bounding box to constrain search.
|
||||
If provided, only words within this region are considered.
|
||||
|
||||
Returns:
|
||||
Number of highlight rectangles added
|
||||
"""
|
||||
# Tokenize chunk into words (alphanumeric only, lowercase)
|
||||
chunk_words = re.findall(
|
||||
r"\w+", PDFHighlighter.strip_markdown(chunk_text).lower()
|
||||
)
|
||||
|
||||
if not chunk_words:
|
||||
logger.warning("No words found in chunk text")
|
||||
return 0
|
||||
|
||||
# Get all words from page with positions
|
||||
# Format: (x0, y0, x1, y1, "word", block_no, line_no, word_no)
|
||||
try:
|
||||
page_words = page.get_text("words")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract words from page: {e}")
|
||||
return 0
|
||||
|
||||
if not page_words:
|
||||
logger.warning("No words found on page")
|
||||
return 0
|
||||
|
||||
# Filter words by search region if provided
|
||||
if search_region:
|
||||
rx0, ry0, rx1, ry1 = search_region
|
||||
# Allow some tolerance (10 points) for words near region boundary
|
||||
tolerance = 10
|
||||
page_words = [
|
||||
w
|
||||
for w in page_words
|
||||
if (
|
||||
w[0] >= rx0 - tolerance
|
||||
and w[2] <= rx1 + tolerance
|
||||
and w[1] >= ry0 - tolerance
|
||||
and w[3] <= ry1 + tolerance
|
||||
)
|
||||
]
|
||||
logger.debug(
|
||||
f"Filtered to {len(page_words)} words in region "
|
||||
f"({rx0:.0f}, {ry0:.0f}, {rx1:.0f}, {ry1:.0f})"
|
||||
)
|
||||
|
||||
if not page_words:
|
||||
logger.warning("No words found in search region")
|
||||
return 0
|
||||
|
||||
# Find matching word sequence - use FIRST match, not longest
|
||||
# This ensures we highlight the actual chunk location, not similar text elsewhere
|
||||
matches = []
|
||||
|
||||
# Build a simple word-to-positions index for the first few chunk words
|
||||
# to find candidate starting positions
|
||||
first_chunk_word = chunk_words[0] if chunk_words else ""
|
||||
candidate_starts = []
|
||||
|
||||
for i, pw in enumerate(page_words):
|
||||
page_word = pw[4].lower()
|
||||
# Check if this could be the start of the chunk
|
||||
if (
|
||||
first_chunk_word == page_word
|
||||
or first_chunk_word in page_word
|
||||
or page_word in first_chunk_word
|
||||
):
|
||||
candidate_starts.append(i)
|
||||
|
||||
# Try each candidate start position and take the FIRST good match
|
||||
for start_pos in candidate_starts:
|
||||
current_matches = []
|
||||
chunk_idx = 0
|
||||
skip_count = 0
|
||||
max_skips = 3 # Allow some formatting differences
|
||||
|
||||
for page_idx in range(start_pos, len(page_words)):
|
||||
if chunk_idx >= len(chunk_words):
|
||||
break
|
||||
|
||||
page_word = page_words[page_idx][4].lower()
|
||||
chunk_word = chunk_words[chunk_idx]
|
||||
|
||||
# Check for match (allow partial matches for flexibility)
|
||||
if (
|
||||
chunk_word == page_word
|
||||
or chunk_word in page_word
|
||||
or page_word in chunk_word
|
||||
):
|
||||
current_matches.append(page_words[page_idx])
|
||||
chunk_idx += 1
|
||||
skip_count = 0
|
||||
elif skip_count < max_skips:
|
||||
# Allow skipping some words (formatting, punctuation)
|
||||
skip_count += 1
|
||||
continue
|
||||
else:
|
||||
break
|
||||
|
||||
# Accept if we matched at least 50% of chunk words
|
||||
if len(current_matches) >= len(chunk_words) * 0.5:
|
||||
matches = current_matches
|
||||
logger.debug(
|
||||
f"Found match at position {start_pos}: "
|
||||
f"{len(matches)}/{len(chunk_words)} words"
|
||||
)
|
||||
break # Take FIRST match, not best/longest
|
||||
|
||||
if not matches:
|
||||
logger.debug(f"No word matches found (chunk has {len(chunk_words)} words)")
|
||||
return 0
|
||||
|
||||
logger.debug(
|
||||
f"Matched {len(matches)} words out of {len(chunk_words)} chunk words"
|
||||
)
|
||||
|
||||
# Build rectangles from matched words
|
||||
rects = [pymupdf.Rect(w[0], w[1], w[2], w[3]) for w in matches]
|
||||
|
||||
# Check if matches are contiguous (not scattered across the page)
|
||||
# Scattered matches indicate false positives from common words
|
||||
if len(rects) > 1:
|
||||
# Sort by vertical position then horizontal
|
||||
sorted_matches = sorted(matches, key=lambda w: (round(w[1]), w[0]))
|
||||
|
||||
# Check for large vertical gaps (more than ~2 lines apart)
|
||||
# A typical line height is 12-20 points
|
||||
max_line_gap = 50 # Points - allows for ~2-3 lines gap
|
||||
prev_y = sorted_matches[0][1]
|
||||
large_gaps = 0
|
||||
|
||||
for match in sorted_matches[1:]:
|
||||
y_gap = match[1] - prev_y
|
||||
if y_gap > max_line_gap:
|
||||
large_gaps += 1
|
||||
prev_y = match[1]
|
||||
|
||||
# If matches are scattered (many large gaps), reject this match
|
||||
# A chunk should be mostly contiguous text
|
||||
if large_gaps > len(matches) * 0.3: # More than 30% have gaps
|
||||
logger.debug(
|
||||
f"Rejecting scattered matches: {large_gaps} large gaps "
|
||||
f"out of {len(matches)} matches"
|
||||
)
|
||||
return 0
|
||||
|
||||
# Merge adjacent rectangles on the same line for cleaner highlighting
|
||||
merged_rects = []
|
||||
sorted_rects = sorted(rects, key=lambda r: (round(r.y0), r.x0))
|
||||
|
||||
current_rect = None
|
||||
for rect in sorted_rects:
|
||||
if current_rect is None:
|
||||
current_rect = rect
|
||||
elif abs(rect.y0 - current_rect.y0) < 5: # Same line (within 5 points)
|
||||
current_rect = current_rect | rect # Union
|
||||
else:
|
||||
merged_rects.append(current_rect)
|
||||
current_rect = rect
|
||||
|
||||
if current_rect:
|
||||
merged_rects.append(current_rect)
|
||||
|
||||
# Add highlights
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
for rect in merged_rects:
|
||||
highlight = page.add_highlight_annot(rect)
|
||||
highlight.set_colors({"stroke": rgb})
|
||||
highlight.set_info(
|
||||
content="Chunk from semantic search",
|
||||
title="PDF Highlighter (word-position)",
|
||||
)
|
||||
highlight.update()
|
||||
|
||||
return len(merged_rects)
|
||||
|
||||
@staticmethod
|
||||
def find_unique_phrase(
|
||||
text: str, min_len: int = 30, max_len: int = 80
|
||||
) -> str | None:
|
||||
"""Find a relatively unique phrase from text for location search.
|
||||
|
||||
Looks for phrases that are likely to be unique on the page:
|
||||
- Prefers phrases with numbers or special terms
|
||||
- Avoids very common words
|
||||
|
||||
Args:
|
||||
text: Source text to extract phrase from
|
||||
min_len: Minimum phrase length
|
||||
max_len: Maximum phrase length
|
||||
|
||||
Returns:
|
||||
A phrase likely to be unique, or None if not found
|
||||
"""
|
||||
clean_text = PDFHighlighter.strip_markdown(text).strip()
|
||||
if not clean_text:
|
||||
return None
|
||||
|
||||
# Try first sentence (often unique due to context)
|
||||
sentences = re.split(r"[.!?]\s+", clean_text)
|
||||
for sentence in sentences:
|
||||
sentence = sentence.strip()
|
||||
if min_len <= len(sentence) <= max_len:
|
||||
return sentence
|
||||
elif len(sentence) > max_len:
|
||||
return sentence[:max_len]
|
||||
|
||||
# Fallback: first N chars
|
||||
if len(clean_text) >= min_len:
|
||||
return clean_text[:max_len]
|
||||
|
||||
return clean_text if clean_text else None
|
||||
|
||||
@staticmethod
|
||||
def _find_chunk_bbox(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
page_relative_start: int,
|
||||
page_relative_end: int,
|
||||
page_text_length: int,
|
||||
) -> tuple[float, float, float, float] | None:
|
||||
"""Find bounding box for a chunk without modifying the page.
|
||||
|
||||
Returns (x0, y0, x1, y1) in page coordinates, or None if not found.
|
||||
"""
|
||||
page_rect = page.rect
|
||||
|
||||
# Strip markdown for searching
|
||||
search_text = PDFHighlighter.strip_markdown(chunk_text)
|
||||
|
||||
# Try to find chunk location using text search
|
||||
anchor_rect = None
|
||||
search_phrases = []
|
||||
|
||||
# Build search phrases from chunk text
|
||||
sentences = re.split(r"[.!?]\s+", search_text)
|
||||
for sentence in sentences[:3]:
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) >= 20:
|
||||
search_phrases.append(sentence[:80])
|
||||
if len(sentence) >= 40:
|
||||
search_phrases.append(sentence[:40])
|
||||
|
||||
# Also try first N characters
|
||||
if len(search_text) >= 30:
|
||||
search_phrases.append(search_text[:60])
|
||||
search_phrases.append(search_text[:30])
|
||||
|
||||
for phrase in search_phrases:
|
||||
if not phrase:
|
||||
continue
|
||||
rects = page.search_for(phrase.strip())
|
||||
if rects:
|
||||
anchor_rect = rects[0]
|
||||
break
|
||||
|
||||
if not anchor_rect:
|
||||
return None
|
||||
|
||||
# Calculate chunk height based on character count
|
||||
chunk_chars = len(search_text)
|
||||
estimated_lines = max(1, chunk_chars / 60)
|
||||
estimated_height = estimated_lines * 14
|
||||
|
||||
# Build bounding box
|
||||
return (
|
||||
page_rect.x0 + 30, # Left margin
|
||||
anchor_rect.y0 - 5, # Start slightly above anchor
|
||||
page_rect.x1 - 30, # Right margin
|
||||
min(anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk_on_page(
|
||||
page: pymupdf.Page,
|
||||
chunk_text: str,
|
||||
color: str = "yellow",
|
||||
page_relative_start: int | None = None,
|
||||
page_relative_end: int | None = None,
|
||||
page_text_length: int | None = None,
|
||||
) -> int:
|
||||
"""Add bounding box highlight to a PDF page for the given chunk text.
|
||||
|
||||
Uses text search to find the chunk's location on the page, then draws
|
||||
a bounding box around that region. Falls back to character offset estimation
|
||||
if text search fails.
|
||||
|
||||
Args:
|
||||
page: PyMuPDF page object
|
||||
chunk_text: Text to highlight (may contain markdown)
|
||||
color: Color name from COLORS dict
|
||||
page_relative_start: Character offset where chunk starts on page (optional)
|
||||
page_relative_end: Character offset where chunk ends on page (optional)
|
||||
page_text_length: Total character length of page text (optional)
|
||||
|
||||
Returns:
|
||||
Number of highlights added (1 for bounding box, 0 if failed)
|
||||
"""
|
||||
page_rect = page.rect
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
|
||||
# Strip markdown for searching
|
||||
search_text = PDFHighlighter.strip_markdown(chunk_text)
|
||||
|
||||
# Try to find chunk location using text search
|
||||
# Search for progressively shorter phrases until we find a match
|
||||
anchor_rect = None
|
||||
search_phrases = []
|
||||
|
||||
# Build search phrases from chunk text
|
||||
sentences = re.split(r"[.!?]\s+", search_text)
|
||||
for sentence in sentences[:3]: # Try first 3 sentences
|
||||
sentence = sentence.strip()
|
||||
if len(sentence) >= 20:
|
||||
search_phrases.append(sentence[:80])
|
||||
if len(sentence) >= 40:
|
||||
search_phrases.append(sentence[:40])
|
||||
|
||||
# Also try first N characters
|
||||
if len(search_text) >= 30:
|
||||
search_phrases.append(search_text[:60])
|
||||
search_phrases.append(search_text[:30])
|
||||
|
||||
for phrase in search_phrases:
|
||||
if not phrase:
|
||||
continue
|
||||
rects = page.search_for(phrase.strip())
|
||||
if rects:
|
||||
anchor_rect = rects[0] # Use first match
|
||||
logger.debug(f"Found chunk anchor using phrase: '{phrase[:30]}...'")
|
||||
break
|
||||
|
||||
if not anchor_rect:
|
||||
page_num = page.number + 1 if page.number is not None else "unknown"
|
||||
logger.warning(f"Could not find chunk text on page {page_num}")
|
||||
return 0
|
||||
|
||||
# Calculate chunk height based on character count
|
||||
# Estimate ~15 chars per line, ~12pt line height
|
||||
chunk_chars = len(search_text)
|
||||
estimated_lines = max(1, chunk_chars / 60) # ~60 chars per line typical
|
||||
estimated_height = estimated_lines * 14 # ~14pt per line
|
||||
|
||||
# Build bounding box starting from anchor
|
||||
chunk_rect = pymupdf.Rect(
|
||||
page_rect.x0 + 30, # Left margin
|
||||
anchor_rect.y0 - 5, # Start slightly above anchor
|
||||
page_rect.x1 - 30, # Right margin
|
||||
min(
|
||||
anchor_rect.y0 + estimated_height + 10, page_rect.y1 - 30
|
||||
), # Estimated bottom
|
||||
)
|
||||
|
||||
# Draw a visible rectangle around the chunk region
|
||||
shape = page.new_shape()
|
||||
shape.draw_rect(chunk_rect)
|
||||
shape.finish(
|
||||
color=rgb, # Border color
|
||||
fill=None, # No fill (transparent)
|
||||
width=2.5, # Border width
|
||||
dashes="[4 2]", # Dashed line
|
||||
)
|
||||
shape.commit()
|
||||
|
||||
# Add semi-transparent fill for visibility
|
||||
fill_shape = page.new_shape()
|
||||
fill_shape.draw_rect(chunk_rect)
|
||||
fill_shape.finish(
|
||||
color=None, # No border
|
||||
fill=[1, 1, 0.7], # Light yellow fill
|
||||
fill_opacity=0.15, # Very transparent
|
||||
)
|
||||
fill_shape.commit()
|
||||
|
||||
logger.debug(
|
||||
f"Added bounding box at y={chunk_rect.y0:.0f}-{chunk_rect.y1:.0f} "
|
||||
f"(estimated {estimated_lines:.1f} lines)"
|
||||
)
|
||||
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunk(
|
||||
pdf_bytes: bytes,
|
||||
chunk_start_offset: int,
|
||||
chunk_end_offset: int,
|
||||
stored_page_number: Optional[int] = None,
|
||||
color: str = "yellow",
|
||||
zoom: float = 2.0,
|
||||
) -> Optional[tuple[bytes, int, int]]:
|
||||
"""Generate PNG image of PDF page with highlighted chunk.
|
||||
|
||||
This is the main entry point for highlighting. It:
|
||||
1. Extracts document text with page boundaries
|
||||
2. Finds which page contains the chunk
|
||||
3. Extracts chunk text using character offsets
|
||||
4. Highlights the chunk on the page
|
||||
5. Renders page to PNG
|
||||
|
||||
Args:
|
||||
pdf_bytes: PDF file bytes
|
||||
chunk_start_offset: Chunk start position (document-level)
|
||||
chunk_end_offset: Chunk end position (document-level)
|
||||
stored_page_number: Page number from metadata (optional, for validation)
|
||||
color: Highlight color name
|
||||
zoom: Rendering zoom factor (2.0 = 144 DPI)
|
||||
|
||||
Returns:
|
||||
Tuple of (png_bytes, page_number, highlight_count) or None if failed
|
||||
"""
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
temp_pdf_path = None
|
||||
try:
|
||||
# Write PDF to temp file with consistent name "pdf.pdf"
|
||||
# This ensures image references match indexing (e.g., pdf-0001.png)
|
||||
# Different temp filenames would cause different markdown text lengths!
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_"))
|
||||
temp_pdf_path = temp_dir / "pdf.pdf"
|
||||
temp_pdf_path.write_bytes(pdf_bytes)
|
||||
|
||||
# Open PDF from temp file
|
||||
doc = pymupdf.open(temp_pdf_path)
|
||||
|
||||
# Extract text with page boundaries
|
||||
full_text, page_boundaries = (
|
||||
PDFHighlighter.extract_pdf_text_with_boundaries(doc)
|
||||
)
|
||||
|
||||
# Find which page contains the chunk
|
||||
chunk_page_info = PDFHighlighter.find_chunk_page(
|
||||
chunk_start_offset, chunk_end_offset, page_boundaries
|
||||
)
|
||||
|
||||
if not chunk_page_info:
|
||||
logger.error("Chunk not found on any page")
|
||||
doc.close()
|
||||
return None
|
||||
|
||||
page_num = chunk_page_info["page_num"]
|
||||
|
||||
# Log if page differs from stored metadata
|
||||
if stored_page_number and stored_page_number != page_num:
|
||||
logger.info(
|
||||
f"Chunk primarily on page {page_num}, metadata says {stored_page_number}"
|
||||
)
|
||||
|
||||
# Extract page text
|
||||
page_boundary = page_boundaries[page_num - 1]
|
||||
page_start = page_boundary["start_offset"]
|
||||
page_end = page_boundary["end_offset"]
|
||||
page_text = full_text[page_start:page_end]
|
||||
|
||||
# Extract chunk text using page-relative offsets
|
||||
page_relative_start = chunk_page_info["page_relative_start"]
|
||||
page_relative_end = chunk_page_info["page_relative_end"]
|
||||
chunk_text = page_text[page_relative_start:page_relative_end]
|
||||
|
||||
# Calculate page text length for region estimation
|
||||
page_text_length = page_end - page_start
|
||||
|
||||
logger.debug(
|
||||
f"Extracted {len(chunk_text)} chars on page {page_num} "
|
||||
f"(offsets {page_relative_start}-{page_relative_end} of {page_text_length})"
|
||||
)
|
||||
|
||||
# Get page and add highlights
|
||||
page = doc[page_num - 1]
|
||||
highlight_count = PDFHighlighter.highlight_chunk_on_page(
|
||||
page,
|
||||
chunk_text,
|
||||
color,
|
||||
page_relative_start=page_relative_start,
|
||||
page_relative_end=page_relative_end,
|
||||
page_text_length=page_text_length,
|
||||
)
|
||||
|
||||
if highlight_count == 0:
|
||||
logger.warning("No highlights added")
|
||||
doc.close()
|
||||
return None
|
||||
|
||||
# Render page to PNG
|
||||
mat = pymupdf.Matrix(zoom, zoom)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
png_bytes = pix.tobytes("png")
|
||||
|
||||
doc.close()
|
||||
|
||||
logger.info(
|
||||
f"Generated {len(png_bytes):,} byte image with {highlight_count} highlights"
|
||||
)
|
||||
|
||||
return (png_bytes, page_num, highlight_count)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error highlighting chunk: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
finally:
|
||||
# Clean up temp directory and PDF file
|
||||
if temp_pdf_path and temp_pdf_path.parent.exists():
|
||||
try:
|
||||
import shutil
|
||||
|
||||
shutil.rmtree(temp_pdf_path.parent)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to delete temp directory {temp_pdf_path.parent}: {e}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def highlight_chunks_batch(
|
||||
pdf_bytes: bytes,
|
||||
chunks: list[tuple[int, int, int, int | None, str]],
|
||||
page_boundaries: list[dict],
|
||||
full_text: str,
|
||||
color: str = "yellow",
|
||||
zoom: float = 2.0,
|
||||
) -> dict[int, tuple[bytes, int, int]]:
|
||||
"""Generate highlighted images for multiple chunks.
|
||||
|
||||
Opens PDF once for rendering, uses pre-computed page boundaries from the
|
||||
document processor. This ensures consistent character offsets between
|
||||
chunking and highlighting.
|
||||
|
||||
Args:
|
||||
pdf_bytes: PDF file bytes
|
||||
chunks: List of (chunk_index, start_offset, end_offset, stored_page_number, chunk_text)
|
||||
The chunk_index is used as the key in the returned dict.
|
||||
chunk_text is the actual text content of the chunk.
|
||||
page_boundaries: Pre-computed page boundaries from document processor.
|
||||
Each entry: {"page": 1, "start_offset": 0, "end_offset": 1234}
|
||||
full_text: Full document text for extracting page-relative portions.
|
||||
color: Highlight color name
|
||||
zoom: Rendering zoom factor (2.0 = 144 DPI)
|
||||
|
||||
Returns:
|
||||
Dict mapping chunk_index to (png_bytes, page_number, highlight_count)
|
||||
Chunks that fail to highlight are omitted from the result.
|
||||
"""
|
||||
import shutil
|
||||
import tempfile
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
results: dict[int, tuple[bytes, int, int]] = {}
|
||||
|
||||
if not chunks:
|
||||
return results
|
||||
|
||||
temp_pdf_path = None
|
||||
try:
|
||||
# Write PDF to temp file
|
||||
temp_dir = Path(tempfile.mkdtemp(prefix="pdf_highlight_batch_"))
|
||||
temp_pdf_path = temp_dir / "pdf.pdf"
|
||||
temp_pdf_path.write_bytes(pdf_bytes)
|
||||
|
||||
# Open PDF once (only for rendering, not text extraction)
|
||||
doc = pymupdf.open(temp_pdf_path)
|
||||
|
||||
logger.debug(
|
||||
f"Batch highlighting: {len(chunks)} chunks, "
|
||||
f"{len(page_boundaries)} pages"
|
||||
)
|
||||
|
||||
# Group chunks by their target page for efficient rendering
|
||||
# We'll render each page only once with all its highlights
|
||||
chunks_by_page: dict[int, list[tuple[int, dict, str]]] = defaultdict(list)
|
||||
|
||||
for chunk_tuple in chunks:
|
||||
# Unpack chunk tuple - chunk_text is now passed directly
|
||||
chunk_index, start_offset, end_offset, stored_page_num, chunk_text = (
|
||||
chunk_tuple
|
||||
)
|
||||
|
||||
# Find which page contains this chunk
|
||||
chunk_page_info = PDFHighlighter.find_chunk_page(
|
||||
start_offset, end_offset, page_boundaries
|
||||
)
|
||||
|
||||
if not chunk_page_info:
|
||||
logger.warning(f"Chunk {chunk_index}: not found on any page")
|
||||
continue
|
||||
|
||||
page_num = chunk_page_info["page_num"]
|
||||
|
||||
# Log if page differs from stored metadata
|
||||
if stored_page_num and stored_page_num != page_num:
|
||||
logger.debug(
|
||||
f"Chunk {chunk_index}: found on page {page_num}, "
|
||||
f"metadata says {stored_page_num}"
|
||||
)
|
||||
|
||||
# Extract page-relative portion of chunk text
|
||||
# This is critical for cross-page chunks where the start
|
||||
# of the chunk might be on a different page
|
||||
page_boundary = page_boundaries[page_num - 1]
|
||||
page_start = page_boundary["start_offset"]
|
||||
page_end = page_boundary["end_offset"]
|
||||
page_text_length = page_end - page_start
|
||||
|
||||
# Calculate what portion of the chunk appears on this page
|
||||
chunk_start_on_page = max(start_offset, page_start)
|
||||
chunk_end_on_page = min(end_offset, page_end)
|
||||
|
||||
# Extract just the text that appears on this page
|
||||
page_relative_text = full_text[chunk_start_on_page:chunk_end_on_page]
|
||||
|
||||
chunks_by_page[page_num].append(
|
||||
(chunk_index, chunk_page_info, page_relative_text, page_text_length)
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Chunks distributed across {len(chunks_by_page)} unique pages"
|
||||
)
|
||||
|
||||
# OPTIMIZATION: Render each page ONCE, then draw highlights using PIL
|
||||
# This avoids expensive page.get_pixmap() calls per chunk
|
||||
from io import BytesIO
|
||||
|
||||
from PIL import Image, ImageDraw
|
||||
|
||||
# PIL color for bounding box (RGB tuple)
|
||||
rgb = PDFHighlighter.COLORS.get(color, PDFHighlighter.COLORS["yellow"])
|
||||
pil_color = tuple(int(c * 255) for c in rgb)
|
||||
fill_color = (255, 255, 178, 38) # Light yellow with alpha
|
||||
|
||||
for page_num, page_chunks in chunks_by_page.items():
|
||||
page = doc[page_num - 1]
|
||||
|
||||
# Render page ONCE to get base image (most expensive operation)
|
||||
mat = pymupdf.Matrix(zoom, zoom)
|
||||
base_pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
base_png = base_pix.tobytes("png")
|
||||
|
||||
# Convert to PIL Image for fast highlight drawing
|
||||
base_image = Image.open(BytesIO(base_png)).convert("RGBA")
|
||||
page_rect = page.rect
|
||||
|
||||
logger.debug(
|
||||
f"Page {page_num}: rendered once, processing {len(page_chunks)} chunks"
|
||||
)
|
||||
|
||||
for (
|
||||
chunk_index,
|
||||
chunk_page_info,
|
||||
chunk_text,
|
||||
page_text_length,
|
||||
) in page_chunks:
|
||||
try:
|
||||
# Find chunk bounding box using text search
|
||||
bbox = PDFHighlighter._find_chunk_bbox(
|
||||
page,
|
||||
chunk_text,
|
||||
chunk_page_info["page_relative_start"],
|
||||
chunk_page_info["page_relative_end"],
|
||||
page_text_length,
|
||||
)
|
||||
|
||||
if bbox is None:
|
||||
logger.warning(f"Chunk {chunk_index}: could not find bbox")
|
||||
continue
|
||||
|
||||
# Copy base image for this chunk
|
||||
chunk_image = base_image.copy()
|
||||
|
||||
# Scale bbox coordinates to pixmap coordinates
|
||||
scale_x = base_pix.width / page_rect.width
|
||||
scale_y = base_pix.height / page_rect.height
|
||||
pil_bbox = (
|
||||
int(bbox[0] * scale_x),
|
||||
int(bbox[1] * scale_y),
|
||||
int(bbox[2] * scale_x),
|
||||
int(bbox[3] * scale_y),
|
||||
)
|
||||
|
||||
# Create transparent overlay for fill (proper alpha blending)
|
||||
overlay = Image.new("RGBA", chunk_image.size, (0, 0, 0, 0))
|
||||
overlay_draw = ImageDraw.Draw(overlay)
|
||||
overlay_draw.rectangle(pil_bbox, fill=fill_color)
|
||||
|
||||
# Alpha composite the overlay onto the chunk image
|
||||
chunk_image = Image.alpha_composite(chunk_image, overlay)
|
||||
|
||||
# Draw border on top (solid, not transparent)
|
||||
border_draw = ImageDraw.Draw(chunk_image)
|
||||
border_draw.rectangle(pil_bbox, outline=pil_color, width=3)
|
||||
|
||||
# Convert back to PNG bytes
|
||||
output = BytesIO()
|
||||
chunk_image.convert("RGB").save(output, format="PNG")
|
||||
png_bytes = output.getvalue()
|
||||
|
||||
results[chunk_index] = (png_bytes, page_num, 1)
|
||||
|
||||
logger.debug(
|
||||
f"Chunk {chunk_index}: {len(png_bytes):,} bytes, "
|
||||
f"page {page_num}, bbox {pil_bbox}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Chunk {chunk_index}: error - {e}")
|
||||
continue
|
||||
|
||||
doc.close()
|
||||
|
||||
logger.info(
|
||||
f"Batch highlighted {len(results)}/{len(chunks)} chunks successfully"
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in batch highlighting: {e}", exc_info=True)
|
||||
return results
|
||||
|
||||
finally:
|
||||
# Clean up temp directory
|
||||
if temp_pdf_path and temp_pdf_path.parent.exists():
|
||||
try:
|
||||
shutil.rmtree(temp_pdf_path.parent)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to clean up temp dir: {e}")
|
||||
@@ -9,7 +9,6 @@ from nextcloud_mcp_server.config import get_settings
|
||||
from nextcloud_mcp_server.embedding import get_embedding_service
|
||||
from nextcloud_mcp_server.observability.metrics import record_qdrant_operation
|
||||
from nextcloud_mcp_server.search.algorithms import SearchAlgorithm, SearchResult
|
||||
from nextcloud_mcp_server.vector.placeholder import get_placeholder_filter
|
||||
from nextcloud_mcp_server.vector.qdrant_client import get_qdrant_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -51,9 +50,6 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
Returns unverified results from Qdrant. Access verification should be
|
||||
performed separately at the final output stage using verify_search_results().
|
||||
|
||||
Deduplicates by (doc_id, doc_type, chunk_start_offset, chunk_end_offset)
|
||||
to show multiple chunks from the same document while avoiding duplicate chunks.
|
||||
|
||||
Args:
|
||||
query: Natural language search query
|
||||
user_id: User ID for filtering
|
||||
@@ -78,19 +74,16 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
# Generate embedding for query
|
||||
embedding_service = get_embedding_service()
|
||||
query_embedding = await embedding_service.embed(query)
|
||||
# Store for reuse by callers (e.g., viz_routes PCA visualization)
|
||||
self.query_embedding = query_embedding
|
||||
logger.debug(
|
||||
f"Generated embedding for query (dimension={len(query_embedding)})"
|
||||
)
|
||||
|
||||
# Build Qdrant filter
|
||||
filter_conditions = [
|
||||
get_placeholder_filter(), # Always exclude placeholders from user-facing queries
|
||||
FieldCondition(
|
||||
key="user_id",
|
||||
match=MatchValue(value=user_id),
|
||||
),
|
||||
)
|
||||
]
|
||||
|
||||
# Add doc_type filter if specified
|
||||
@@ -130,41 +123,20 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
top_scores = [p.score for p in search_response.points[:3]]
|
||||
logger.debug(f"Top 3 similarity scores: {top_scores}")
|
||||
|
||||
# Deduplicate by (doc_id, doc_type, chunk_start, chunk_end)
|
||||
# This allows multiple chunks from same doc, but removes duplicate chunks
|
||||
seen_chunks = set()
|
||||
# Deduplicate by (doc_id, doc_type) - multiple chunks per document
|
||||
seen_docs = set()
|
||||
results = []
|
||||
|
||||
for result in search_response.points:
|
||||
if result.payload is None:
|
||||
continue
|
||||
# doc_id can be int (notes) or str (files - file paths)
|
||||
doc_id = result.payload["doc_id"]
|
||||
doc_id = int(result.payload["doc_id"])
|
||||
doc_type = result.payload.get("doc_type", "note")
|
||||
chunk_start = result.payload.get("chunk_start_offset")
|
||||
chunk_end = result.payload.get("chunk_end_offset")
|
||||
chunk_key = (doc_id, doc_type, chunk_start, chunk_end)
|
||||
doc_key = (doc_id, doc_type)
|
||||
|
||||
# Skip if we've already seen this exact chunk
|
||||
if chunk_key in seen_chunks:
|
||||
# Skip if we've already seen this document
|
||||
if doc_key in seen_docs:
|
||||
continue
|
||||
|
||||
seen_chunks.add(chunk_key)
|
||||
|
||||
# Build metadata dict with common fields
|
||||
metadata = {
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
}
|
||||
|
||||
# Add file-specific metadata for PDF viewer
|
||||
if doc_type == "file" and (path := result.payload.get("file_path")):
|
||||
metadata["path"] = path
|
||||
|
||||
# Add deck_card-specific metadata for frontend URL construction
|
||||
if doc_type == "deck_card":
|
||||
if board_id := result.payload.get("board_id"):
|
||||
metadata["board_id"] = board_id
|
||||
seen_docs.add(doc_key)
|
||||
|
||||
# Return unverified results (verification happens at output stage)
|
||||
results.append(
|
||||
@@ -174,14 +146,12 @@ class SemanticSearchAlgorithm(SearchAlgorithm):
|
||||
title=result.payload.get("title", "Untitled"),
|
||||
excerpt=result.payload.get("excerpt", ""),
|
||||
score=result.score,
|
||||
metadata=metadata,
|
||||
metadata={
|
||||
"chunk_index": result.payload.get("chunk_index"),
|
||||
"total_chunks": result.payload.get("total_chunks"),
|
||||
},
|
||||
chunk_start_offset=result.payload.get("chunk_start_offset"),
|
||||
chunk_end_offset=result.payload.get("chunk_end_offset"),
|
||||
page_number=result.payload.get("page_number"),
|
||||
page_count=result.payload.get("page_count"),
|
||||
chunk_index=result.payload.get("chunk_index", 0),
|
||||
total_chunks=result.payload.get("total_chunks", 1),
|
||||
point_id=str(result.id), # Qdrant point ID for batch retrieval
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ from .calendar import configure_calendar_tools
|
||||
from .contacts import configure_contacts_tools
|
||||
from .cookbook import configure_cookbook_tools
|
||||
from .deck import configure_deck_tools
|
||||
from .news import configure_news_tools
|
||||
from .notes import configure_notes_tools
|
||||
from .semantic import configure_semantic_tools
|
||||
from .sharing import configure_sharing_tools
|
||||
@@ -14,7 +13,6 @@ __all__ = [
|
||||
"configure_contacts_tools",
|
||||
"configure_cookbook_tools",
|
||||
"configure_deck_tools",
|
||||
"configure_news_tools",
|
||||
"configure_notes_tools",
|
||||
"configure_semantic_tools",
|
||||
"configure_sharing_tools",
|
||||
|
||||
@@ -3,7 +3,6 @@ import logging
|
||||
from typing import Optional
|
||||
|
||||
from mcp.server.fastmcp import Context, FastMCP
|
||||
from mcp.types import ToolAnnotations
|
||||
|
||||
from nextcloud_mcp_server.auth import require_scopes
|
||||
from nextcloud_mcp_server.context import get_client
|
||||
@@ -20,10 +19,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
def configure_calendar_tools(mcp: FastMCP):
|
||||
# Calendar tools
|
||||
@mcp.tool(
|
||||
title="List Calendars",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_calendars(ctx: Context) -> ListCalendarsResponse:
|
||||
@@ -34,10 +30,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
calendars = [Calendar(**cal_data) for cal_data in calendars_data]
|
||||
return ListCalendarsResponse(calendars=calendars, total_count=len(calendars))
|
||||
|
||||
@mcp.tool(
|
||||
title="Create Calendar Event",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_event(
|
||||
@@ -114,10 +107,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_event(calendar_name, event_data)
|
||||
|
||||
@mcp.tool(
|
||||
title="List Calendar Events",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_events(
|
||||
@@ -220,10 +210,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return events
|
||||
|
||||
@mcp.tool(
|
||||
title="Get Calendar Event",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_get_event(
|
||||
@@ -236,10 +223,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
event_data, etag = await client.calendar.get_event(calendar_name, event_uid)
|
||||
return event_data
|
||||
|
||||
@mcp.tool(
|
||||
title="Update Calendar Event",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_update_event(
|
||||
@@ -313,12 +297,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
calendar_name, event_uid, event_data, etag
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Delete Calendar Event",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_delete_event(
|
||||
@@ -330,10 +309,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.calendar.delete_event(calendar_name, event_uid)
|
||||
|
||||
@mcp.tool(
|
||||
title="Create Meeting",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_meeting(
|
||||
@@ -400,10 +376,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_event(calendar_name, event_data)
|
||||
|
||||
@mcp.tool(
|
||||
title="Get Upcoming Events",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_get_upcoming_events(
|
||||
@@ -454,10 +427,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
all_events.sort(key=lambda x: x.get("start_datetime", ""))
|
||||
return all_events[:limit]
|
||||
|
||||
@mcp.tool(
|
||||
title="Find Availability",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_find_availability(
|
||||
@@ -538,10 +508,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
constraints=constraints,
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Bulk Calendar Operations",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_bulk_operations(
|
||||
@@ -791,10 +758,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
"results": results,
|
||||
}
|
||||
|
||||
@mcp.tool(
|
||||
title="Manage Calendar",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("calendar:write")
|
||||
@instrument_tool
|
||||
async def nc_calendar_manage_calendar(
|
||||
@@ -864,10 +828,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
# ============= Todo/Task Tools =============
|
||||
|
||||
@mcp.tool(
|
||||
title="List Todo Tasks",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("todo:read", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_list_todos(
|
||||
@@ -913,10 +874,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
todos=todos, calendar_name=calendar_name, total_count=len(todos)
|
||||
)
|
||||
|
||||
@mcp.tool(
|
||||
title="Create Todo Task",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_create_todo(
|
||||
@@ -960,10 +918,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.create_todo(calendar_name, todo_data)
|
||||
|
||||
@mcp.tool(
|
||||
title="Update Todo Task",
|
||||
annotations=ToolAnnotations(idempotentHint=False, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_update_todo(
|
||||
@@ -1024,12 +979,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
|
||||
return await client.calendar.update_todo(calendar_name, todo_uid, todo_data)
|
||||
|
||||
@mcp.tool(
|
||||
title="Delete Todo Task",
|
||||
annotations=ToolAnnotations(
|
||||
destructiveHint=True, idempotentHint=True, openWorldHint=True
|
||||
),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("todo:write", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_delete_todo(
|
||||
@@ -1050,10 +1000,7 @@ def configure_calendar_tools(mcp: FastMCP):
|
||||
client = await get_client(ctx)
|
||||
return await client.calendar.delete_todo(calendar_name, todo_uid)
|
||||
|
||||
@mcp.tool(
|
||||
title="Search Todo Tasks",
|
||||
annotations=ToolAnnotations(readOnlyHint=True, openWorldHint=True),
|
||||
)
|
||||
@mcp.tool()
|
||||
@require_scopes("todo:read", "calendar:read")
|
||||
@instrument_tool
|
||||
async def nc_calendar_search_todos(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user